VEX/priv/guest_arm_toIR.c

   1
   2 /*--------------------------------------------------------------------*/
   3 /*--- begin                                       guest_arm_toIR.c ---*/
   4 /*--------------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Valgrind, a dynamic binary instrumentation
   8    framework.
   9
  10    Copyright (C) 2004-2017 OpenWorks LLP
  11       info@open-works.net
  12
  13    NEON support is
  14    Copyright (C) 2010-2017 Samsung Electronics
  15    contributed by Dmitry Zhurikhin <zhur@ispras.ru>
  16               and Kirill Batuzov <batuzovk@ispras.ru>
  17
  18    This program is free software; you can redistribute it and/or
  19    modify it under the terms of the GNU General Public License as
  20    published by the Free Software Foundation; either version 2 of the
  21    License, or (at your option) any later version.
  22
  23    This program is distributed in the hope that it will be useful, but
  24    WITHOUT ANY WARRANTY; without even the implied warranty of
  25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  26    General Public License for more details.
  27
  28    You should have received a copy of the GNU General Public License
  29    along with this program; if not, see <http://www.gnu.org/licenses/>.
  30
  31    The GNU General Public License is contained in the file COPYING.
  32 */
  33
  34 /* XXXX thumb to check:
  35    that all cases where putIRegT writes r15, we generate a jump.
  36
  37    All uses of newTemp assign to an IRTemp and not a UInt
  38
  39    For all thumb loads and stores, including VFP ones, new-ITSTATE is
  40    backed out before the memory op, and restored afterwards.  This
  41    needs to happen even after we go uncond.  (and for sure it doesn't
  42    happen for VFP loads/stores right now).
  43
  44    VFP on thumb: check that we exclude all r13/r15 cases that we
  45    should.
  46
  47    XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
  48    taking into account the number of insns guarded by an IT.
  49
  50    remove the nasty hack, in the spechelper, of looking for Or32(...,
  51    0xE0) in as the first arg to armg_calculate_condition, and instead
  52    use Slice44 as specified in comments in the spechelper.
  53
  54    add specialisations for armg_calculate_flag_c and _v, as they
  55    are moderately often needed in Thumb code.
  56
  57    Correctness: ITSTATE handling in Thumb SVCs is wrong.
  58
  59    Correctness (obscure): in m_transtab, when invalidating code
  60    address ranges, invalidate up to 18 bytes after the end of the
  61    range.  This is because the ITSTATE optimisation at the top of
  62    _THUMB_WRK below analyses up to 18 bytes before the start of any
  63    given instruction, and so might depend on the invalidated area.
  64 */
  65
  66 /* Limitations, etc
  67
  68    - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
  69      These instructions are non-restartable in the case where the
  70      transfer(s) fault.
  71
  72    - SWP: the restart jump back is Ijk_Boring; it should be
  73      Ijk_NoRedir but that's expensive.  See comments on casLE() in
  74      guest_x86_toIR.c.
  75 */
  76
  77 /* "Special" instructions.
  78
  79    This instruction decoder can decode four special instructions
  80    which mean nothing natively (are no-ops as far as regs/mem are
  81    concerned) but have meaning for supporting Valgrind.  A special
  82    instruction is flagged by a 16-byte preamble:
  83
  84       E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
  85       (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
  86        mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
  87
  88    Following that, one of the following 3 are allowed
  89    (standard interpretation in parentheses):
  90
  91       E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
  92       E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
  93       E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
  94       E1899009 (orr r9,r9,r9)      IR injection
  95
  96    Any other bytes following the 16-byte preamble are illegal and
  97    constitute a failure in instruction decoding.  This all assumes
  98    that the preamble will never occur except in specific code
  99    fragments designed for Valgrind to catch.
 100 */
 101
 102 /* Translates ARM(v5) code to IR. */
 103
 104 #include "libvex_basictypes.h"
 105 #include "libvex_ir.h"
 106 #include "libvex.h"
 107 #include "libvex_guest_arm.h"
 108
 109 #include "main_util.h"
 110 #include "main_globals.h"
 111 #include "guest_generic_bb_to_IR.h"
 112 #include "guest_arm_defs.h"
 113
 114
 115 /*------------------------------------------------------------*/
 116 /*--- Globals                                              ---*/
 117 /*------------------------------------------------------------*/
 118
 119 /* These are set at the start of the translation of a instruction, so
 120    that we don't have to pass them around endlessly.  CONST means does
 121    not change during translation of the instruction.
 122 */
 123
 124 /* CONST: what is the host's endianness?  This has to do with float vs
 125    double register accesses on VFP, but it's complex and not properly
 126    thought out. */
 127 static VexEndness host_endness;
 128
 129 /* CONST: The guest address for the instruction currently being
 130    translated.  This is the real, "decoded" address (not subject
 131    to the CPSR.T kludge). */
 132 static Addr32 guest_R15_curr_instr_notENC;
 133
 134 /* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
 135    insn is Thumb (True) or ARM (False). */
 136 static Bool __curr_is_Thumb;
 137
 138 /* MOD: The IRSB* into which we're generating code. */
 139 static IRSB* irsb;
 140
 141 /* These are to do with handling writes to r15.  They are initially
 142    set at the start of disInstr_ARM_WRK to indicate no update,
 143    possibly updated during the routine, and examined again at the end.
 144    If they have been set to indicate a r15 update then a jump is
 145    generated.  Note, "explicit" jumps (b, bx, etc) are generated
 146    directly, not using this mechanism -- this is intended to handle
 147    the implicit-style jumps resulting from (eg) assigning to r15 as
 148    the result of insns we wouldn't normally consider branchy. */
 149
 150 /* MOD.  Initially False; set to True iff abovementioned handling is
 151    required. */
 152 static Bool r15written;
 153
 154 /* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
 155    is conditional, this holds the gating IRTemp :: Ity_I32.  If the
 156    branch to be generated is unconditional, this remains
 157    IRTemp_INVALID. */
 158 static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
 159
 160 /* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
 161    this holds the jump kind. */
 162 static IRTemp r15kind;
 163
 164
 165 /*------------------------------------------------------------*/
 166 /*--- Debugging output                                     ---*/
 167 /*------------------------------------------------------------*/
 168
 169 #define DIP(format, args...)           \
 170    if (vex_traceflags & VEX_TRACE_FE)  \
 171       vex_printf(format, ## args)
 172
 173 #define DIS(buf, format, args...)      \
 174    if (vex_traceflags & VEX_TRACE_FE)  \
 175       vex_sprintf(buf, format, ## args)
 176
 177 #define ASSERT_IS_THUMB \
 178    do { vassert(__curr_is_Thumb); } while (0)
 179
 180 #define ASSERT_IS_ARM \
 181    do { vassert(! __curr_is_Thumb); } while (0)
 182
 183
 184 /*------------------------------------------------------------*/
 185 /*--- Helper bits and pieces for deconstructing the        ---*/
 186 /*--- arm insn stream.                                     ---*/
 187 /*------------------------------------------------------------*/
 188
 189 /* Do a little-endian load of a 32-bit word, regardless of the
 190    endianness of the underlying host. */
 191 static inline UInt getUIntLittleEndianly ( const UChar* p )
 192 {
 193    UInt w = 0;
 194    w = (w << 8) | p[3];
 195    w = (w << 8) | p[2];
 196    w = (w << 8) | p[1];
 197    w = (w << 8) | p[0];
 198    return w;
 199 }
 200
 201 /* Do a little-endian load of a 16-bit word, regardless of the
 202    endianness of the underlying host. */
 203 static inline UShort getUShortLittleEndianly ( const UChar* p )
 204 {
 205    UShort w = 0;
 206    w = (w << 8) | p[1];
 207    w = (w << 8) | p[0];
 208    return w;
 209 }
 210
 211 static UInt ROR32 ( UInt x, UInt sh ) {
 212    vassert(sh >= 0 && sh < 32);
 213    if (sh == 0)
 214       return x;
 215    else
 216       return (x << (32-sh)) | (x >> sh);
 217 }
 218
 219 static Int popcount32 ( UInt x )
 220 {
 221    Int res = 0, i;
 222    for (i = 0; i < 32; i++) {
 223       res += (x & 1);
 224       x >>= 1;
 225    }
 226    return res;
 227 }
 228
 229 static UInt setbit32 ( UInt x, Int ix, UInt b )
 230 {
 231    UInt mask = 1 << ix;
 232    x &= ~mask;
 233    x |= ((b << ix) & mask);
 234    return x;
 235 }
 236
 237 #define BITS2(_b1,_b0) \
 238    (((_b1) << 1) | (_b0))
 239
 240 #define BITS3(_b2,_b1,_b0)                      \
 241   (((_b2) << 2) | ((_b1) << 1) | (_b0))
 242
 243 #define BITS4(_b3,_b2,_b1,_b0) \
 244    (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
 245
 246 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 247    ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
 248     | BITS4((_b3),(_b2),(_b1),(_b0)))
 249
 250 #define BITS5(_b4,_b3,_b2,_b1,_b0)  \
 251    (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
 252 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
 253    (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 254 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 255    (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 256
 257 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
 258    (((_b8) << 8) \
 259     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 260
 261 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 262    (((_b9) << 9) | ((_b8) << 8)                                \
 263     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 264
 265 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 266    ( ((_b10) << 10) | ((_b9) << 9) | ((_b8) << 8)              \
 267     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 268
 269 #define BITS12(_b11,_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 270    ( ((_b11) << 11) | ((_b10) << 10) | ((_b9) << 9) | ((_b8) << 8) \
 271     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 272
 273 /* produces _uint[_bMax:_bMin] */
 274 #define SLICE_UInt(_uint,_bMax,_bMin) \
 275    (( ((UInt)(_uint)) >> (_bMin)) \
 276     & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
 277
 278
 279 /*------------------------------------------------------------*/
 280 /*--- Helper bits and pieces for creating IR fragments.    ---*/
 281 /*------------------------------------------------------------*/
 282
 283 static IRExpr* mkU64 ( ULong i )
 284 {
 285    return IRExpr_Const(IRConst_U64(i));
 286 }
 287
 288 static IRExpr* mkU32 ( UInt i )
 289 {
 290    return IRExpr_Const(IRConst_U32(i));
 291 }
 292
 293 static IRExpr* mkU8 ( UInt i )
 294 {
 295    vassert(i < 256);
 296    return IRExpr_Const(IRConst_U8( (UChar)i ));
 297 }
 298
 299 static IRExpr* mkexpr ( IRTemp tmp )
 300 {
 301    return IRExpr_RdTmp(tmp);
 302 }
 303
 304 static IRExpr* unop ( IROp op, IRExpr* a )
 305 {
 306    return IRExpr_Unop(op, a);
 307 }
 308
 309 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
 310 {
 311    return IRExpr_Binop(op, a1, a2);
 312 }
 313
 314 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
 315 {
 316    return IRExpr_Triop(op, a1, a2, a3);
 317 }
 318
 319 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
 320 {
 321    return IRExpr_Load(Iend_LE, ty, addr);
 322 }
 323
 324 /* Add a statement to the list held by "irbb". */
 325 static void stmt ( IRStmt* st )
 326 {
 327    addStmtToIRSB( irsb, st );
 328 }
 329
 330 static void assign ( IRTemp dst, IRExpr* e )
 331 {
 332    stmt( IRStmt_WrTmp(dst, e) );
 333 }
 334
 335 static void storeLE ( IRExpr* addr, IRExpr* data )
 336 {
 337    stmt( IRStmt_Store(Iend_LE, addr, data) );
 338 }
 339
 340 static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
 341 {
 342    if (guardT == IRTemp_INVALID) {
 343       /* unconditional */
 344       storeLE(addr, data);
 345    } else {
 346       stmt( IRStmt_StoreG(Iend_LE, addr, data,
 347                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
 348    }
 349 }
 350
 351 static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
 352                             IRExpr* addr, IRExpr* alt,
 353                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
 354 {
 355    if (guardT == IRTemp_INVALID) {
 356       /* unconditional */
 357       IRExpr* loaded = NULL;
 358       switch (cvt) {
 359          case ILGop_Ident32:
 360             loaded = loadLE(Ity_I32, addr); break;
 361          case ILGop_8Uto32:
 362             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
 363          case ILGop_8Sto32:
 364             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
 365          case ILGop_16Uto32:
 366             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
 367          case ILGop_16Sto32:
 368             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
 369          default:
 370             vassert(0);
 371       }
 372       vassert(loaded != NULL);
 373       assign(dst, loaded);
 374    } else {
 375       /* Generate a guarded load into 'dst', but apply 'cvt' to the
 376          loaded data before putting the data in 'dst'.  If the load
 377          does not take place, 'alt' is placed directly in 'dst'. */
 378       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
 379                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
 380    }
 381 }
 382
 383 /* Generate a new temporary of the given type. */
 384 static IRTemp newTemp ( IRType ty )
 385 {
 386    vassert(isPlausibleIRType(ty));
 387    return newIRTemp( irsb->tyenv, ty );
 388 }
 389
 390 /* Produces a value in 0 .. 3, which is encoded as per the type
 391    IRRoundingMode. */
 392 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
 393 {
 394    return mkU32(Irrm_NEAREST);
 395 }
 396
 397 /* Generate an expression for SRC rotated right by ROT. */
 398 static IRExpr* genROR32( IRTemp src, Int rot )
 399 {
 400    vassert(rot >= 0 && rot < 32);
 401    if (rot == 0)
 402       return mkexpr(src);
 403    return
 404       binop(Iop_Or32,
 405             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
 406             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
 407 }
 408
 409 static IRExpr* mkU128 ( ULong i )
 410 {
 411    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
 412 }
 413
 414 /* Generate a 4-aligned version of the given expression if
 415    the given condition is true.  Else return it unchanged. */
 416 static IRExpr* align4if ( IRExpr* e, Bool b )
 417 {
 418    if (b)
 419       return binop(Iop_And32, e, mkU32(~3));
 420    else
 421       return e;
 422 }
 423
 424
 425 /*------------------------------------------------------------*/
 426 /*--- Helpers for accessing guest registers.               ---*/
 427 /*------------------------------------------------------------*/
 428
 429 #define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
 430 #define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
 431 #define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
 432 #define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
 433 #define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
 434 #define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
 435 #define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
 436 #define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
 437 #define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
 438 #define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
 439 #define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
 440 #define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
 441 #define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
 442 #define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
 443 #define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
 444 #define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
 445
 446 #define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
 447 #define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
 448 #define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
 449 #define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
 450 #define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
 451
 452 #define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
 453 #define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
 454 #define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
 455 #define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
 456 #define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
 457 #define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
 458 #define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
 459 #define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
 460 #define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
 461 #define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
 462 #define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
 463 #define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
 464 #define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
 465 #define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
 466 #define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
 467 #define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
 468 #define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
 469 #define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
 470 #define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
 471 #define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
 472 #define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
 473 #define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
 474 #define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
 475 #define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
 476 #define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
 477 #define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
 478 #define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
 479 #define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
 480 #define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
 481 #define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
 482 #define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
 483 #define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
 484
 485 #define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
 486 #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
 487 #define OFFB_TPIDRURW offsetof(VexGuestARMState,guest_TPIDRURW)
 488 #define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
 489 #define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
 490 #define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
 491 #define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
 492 #define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
 493 #define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
 494
 495 #define OFFB_CMSTART  offsetof(VexGuestARMState,guest_CMSTART)
 496 #define OFFB_CMLEN    offsetof(VexGuestARMState,guest_CMLEN)
 497
 498
 499 /* ---------------- Integer registers ---------------- */
 500
 501 static Int integerGuestRegOffset ( UInt iregNo )
 502 {
 503    /* Do we care about endianness here?  We do if sub-parts of integer
 504       registers are accessed, but I don't think that ever happens on
 505       ARM. */
 506    switch (iregNo) {
 507       case 0:  return OFFB_R0;
 508       case 1:  return OFFB_R1;
 509       case 2:  return OFFB_R2;
 510       case 3:  return OFFB_R3;
 511       case 4:  return OFFB_R4;
 512       case 5:  return OFFB_R5;
 513       case 6:  return OFFB_R6;
 514       case 7:  return OFFB_R7;
 515       case 8:  return OFFB_R8;
 516       case 9:  return OFFB_R9;
 517       case 10: return OFFB_R10;
 518       case 11: return OFFB_R11;
 519       case 12: return OFFB_R12;
 520       case 13: return OFFB_R13;
 521       case 14: return OFFB_R14;
 522       case 15: return OFFB_R15T;
 523       default: vassert(0);
 524    }
 525 }
 526
 527 /* Plain ("low level") read from a reg; no +8 offset magic for r15. */
 528 static IRExpr* llGetIReg ( UInt iregNo )
 529 {
 530    vassert(iregNo < 16);
 531    return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
 532 }
 533
 534 /* Architected read from a reg in ARM mode.  This automagically adds 8
 535    to all reads of r15. */
 536 static IRExpr* getIRegA ( UInt iregNo )
 537 {
 538    IRExpr* e;
 539    ASSERT_IS_ARM;
 540    vassert(iregNo < 16);
 541    if (iregNo == 15) {
 542       /* If asked for r15, don't read the guest state value, as that
 543          may not be up to date in the case where loop unrolling has
 544          happened, because the first insn's write to the block is
 545          omitted; hence in the 2nd and subsequent unrollings we don't
 546          have a correct value in guest r15.  Instead produce the
 547          constant that we know would be produced at this point. */
 548       vassert(0 == (guest_R15_curr_instr_notENC & 3));
 549       e = mkU32(guest_R15_curr_instr_notENC + 8);
 550    } else {
 551       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
 552    }
 553    return e;
 554 }
 555
 556 /* Architected read from a reg in Thumb mode.  This automagically adds
 557    4 to all reads of r15. */
 558 static IRExpr* getIRegT ( UInt iregNo )
 559 {
 560    IRExpr* e;
 561    ASSERT_IS_THUMB;
 562    vassert(iregNo < 16);
 563    if (iregNo == 15) {
 564       /* Ditto comment in getIReg. */
 565       vassert(0 == (guest_R15_curr_instr_notENC & 1));
 566       e = mkU32(guest_R15_curr_instr_notENC + 4);
 567    } else {
 568       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
 569    }
 570    return e;
 571 }
 572
 573 /* Plain ("low level") write to a reg; no jump or alignment magic for
 574    r15. */
 575 static void llPutIReg ( UInt iregNo, IRExpr* e )
 576 {
 577    vassert(iregNo < 16);
 578    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
 579    stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
 580 }
 581
 582 /* Architected write to an integer register in ARM mode.  If it is to
 583    r15, record info so at the end of this insn's translation, a branch
 584    to it can be made.  Also handles conditional writes to the
 585    register: if guardT == IRTemp_INVALID then the write is
 586    unconditional.  If writing r15, also 4-align it. */
 587 static void putIRegA ( UInt       iregNo,
 588                        IRExpr*    e,
 589                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
 590                        IRJumpKind jk /* if a jump is generated */ )
 591 {
 592    /* if writing r15, force e to be 4-aligned. */
 593    // INTERWORKING FIXME.  this needs to be relaxed so that
 594    // puts caused by LDMxx which load r15 interwork right.
 595    // but is no aligned too relaxed?
 596    //if (iregNo == 15)
 597    //   e = binop(Iop_And32, e, mkU32(~3));
 598    ASSERT_IS_ARM;
 599    /* So, generate either an unconditional or a conditional write to
 600       the reg. */
 601    if (guardT == IRTemp_INVALID) {
 602       /* unconditional write */
 603       llPutIReg( iregNo, e );
 604    } else {
 605       llPutIReg( iregNo,
 606                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 607                              e, llGetIReg(iregNo) ));
 608    }
 609    if (iregNo == 15) {
 610       // assert against competing r15 updates.  Shouldn't
 611       // happen; should be ruled out by the instr matching
 612       // logic.
 613       vassert(r15written == False);
 614       vassert(r15guard   == IRTemp_INVALID);
 615       vassert(r15kind    == Ijk_Boring);
 616       r15written = True;
 617       r15guard   = guardT;
 618       r15kind    = jk;
 619    }
 620 }
 621
 622
 623 /* Architected write to an integer register in Thumb mode.  Writes to
 624    r15 are not allowed.  Handles conditional writes to the register:
 625    if guardT == IRTemp_INVALID then the write is unconditional. */
 626 static void putIRegT ( UInt       iregNo,
 627                        IRExpr*    e,
 628                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
 629 {
 630    /* So, generate either an unconditional or a conditional write to
 631       the reg. */
 632    ASSERT_IS_THUMB;
 633    vassert(iregNo <= 14);
 634    if (guardT == IRTemp_INVALID) {
 635       /* unconditional write */
 636       llPutIReg( iregNo, e );
 637    } else {
 638       llPutIReg( iregNo,
 639                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 640                              e, llGetIReg(iregNo) ));
 641    }
 642 }
 643
 644
 645 /* Thumb16 and Thumb32 only.
 646    Returns true if reg is 13 or 15.  Implements the BadReg
 647    predicate in the ARM ARM. */
 648 static Bool isBadRegT ( UInt r )
 649 {
 650    vassert(r <= 15);
 651    ASSERT_IS_THUMB;
 652    return r == 13 || r == 15;
 653 }
 654
 655
 656 /* ---------------- Double registers ---------------- */
 657
 658 static Int doubleGuestRegOffset ( UInt dregNo )
 659 {
 660    /* Do we care about endianness here?  Probably do if we ever get
 661       into the situation of dealing with the single-precision VFP
 662       registers. */
 663    switch (dregNo) {
 664       case 0:  return OFFB_D0;
 665       case 1:  return OFFB_D1;
 666       case 2:  return OFFB_D2;
 667       case 3:  return OFFB_D3;
 668       case 4:  return OFFB_D4;
 669       case 5:  return OFFB_D5;
 670       case 6:  return OFFB_D6;
 671       case 7:  return OFFB_D7;
 672       case 8:  return OFFB_D8;
 673       case 9:  return OFFB_D9;
 674       case 10: return OFFB_D10;
 675       case 11: return OFFB_D11;
 676       case 12: return OFFB_D12;
 677       case 13: return OFFB_D13;
 678       case 14: return OFFB_D14;
 679       case 15: return OFFB_D15;
 680       case 16: return OFFB_D16;
 681       case 17: return OFFB_D17;
 682       case 18: return OFFB_D18;
 683       case 19: return OFFB_D19;
 684       case 20: return OFFB_D20;
 685       case 21: return OFFB_D21;
 686       case 22: return OFFB_D22;
 687       case 23: return OFFB_D23;
 688       case 24: return OFFB_D24;
 689       case 25: return OFFB_D25;
 690       case 26: return OFFB_D26;
 691       case 27: return OFFB_D27;
 692       case 28: return OFFB_D28;
 693       case 29: return OFFB_D29;
 694       case 30: return OFFB_D30;
 695       case 31: return OFFB_D31;
 696       default: vassert(0);
 697    }
 698 }
 699
 700 /* Plain ("low level") read from a VFP Dreg. */
 701 static IRExpr* llGetDReg ( UInt dregNo )
 702 {
 703    vassert(dregNo < 32);
 704    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
 705 }
 706
 707 /* Architected read from a VFP Dreg. */
 708 static IRExpr* getDReg ( UInt dregNo ) {
 709    return llGetDReg( dregNo );
 710 }
 711
 712 /* Plain ("low level") write to a VFP Dreg. */
 713 static void llPutDReg ( UInt dregNo, IRExpr* e )
 714 {
 715    vassert(dregNo < 32);
 716    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
 717    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
 718 }
 719
 720 /* Architected write to a VFP Dreg.  Handles conditional writes to the
 721    register: if guardT == IRTemp_INVALID then the write is
 722    unconditional. */
 723 static void putDReg ( UInt    dregNo,
 724                       IRExpr* e,
 725                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 726 {
 727    /* So, generate either an unconditional or a conditional write to
 728       the reg. */
 729    if (guardT == IRTemp_INVALID) {
 730       /* unconditional write */
 731       llPutDReg( dregNo, e );
 732    } else {
 733       llPutDReg( dregNo,
 734                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 735                              e, llGetDReg(dregNo) ));
 736    }
 737 }
 738
 739 /* And now exactly the same stuff all over again, but this time
 740    taking/returning I64 rather than F64, to support 64-bit Neon
 741    ops. */
 742
 743 /* Plain ("low level") read from a Neon Integer Dreg. */
 744 static IRExpr* llGetDRegI64 ( UInt dregNo )
 745 {
 746    vassert(dregNo < 32);
 747    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
 748 }
 749
 750 /* Architected read from a Neon Integer Dreg. */
 751 static IRExpr* getDRegI64 ( UInt dregNo ) {
 752    return llGetDRegI64( dregNo );
 753 }
 754
 755 /* Plain ("low level") write to a Neon Integer Dreg. */
 756 static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
 757 {
 758    vassert(dregNo < 32);
 759    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
 760    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
 761 }
 762
 763 /* Architected write to a Neon Integer Dreg.  Handles conditional
 764    writes to the register: if guardT == IRTemp_INVALID then the write
 765    is unconditional. */
 766 static void putDRegI64 ( UInt    dregNo,
 767                          IRExpr* e,
 768                          IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 769 {
 770    /* So, generate either an unconditional or a conditional write to
 771       the reg. */
 772    if (guardT == IRTemp_INVALID) {
 773       /* unconditional write */
 774       llPutDRegI64( dregNo, e );
 775    } else {
 776       llPutDRegI64( dregNo,
 777                     IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 778                                 e, llGetDRegI64(dregNo) ));
 779    }
 780 }
 781
 782 /* ---------------- Quad registers ---------------- */
 783
 784 static Int quadGuestRegOffset ( UInt qregNo )
 785 {
 786    /* Do we care about endianness here?  Probably do if we ever get
 787       into the situation of dealing with the 64 bit Neon registers. */
 788    switch (qregNo) {
 789       case 0:  return OFFB_D0;
 790       case 1:  return OFFB_D2;
 791       case 2:  return OFFB_D4;
 792       case 3:  return OFFB_D6;
 793       case 4:  return OFFB_D8;
 794       case 5:  return OFFB_D10;
 795       case 6:  return OFFB_D12;
 796       case 7:  return OFFB_D14;
 797       case 8:  return OFFB_D16;
 798       case 9:  return OFFB_D18;
 799       case 10: return OFFB_D20;
 800       case 11: return OFFB_D22;
 801       case 12: return OFFB_D24;
 802       case 13: return OFFB_D26;
 803       case 14: return OFFB_D28;
 804       case 15: return OFFB_D30;
 805       default: vassert(0);
 806    }
 807 }
 808
 809 /* Plain ("low level") read from a Neon Qreg. */
 810 static IRExpr* llGetQReg ( UInt qregNo )
 811 {
 812    vassert(qregNo < 16);
 813    return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
 814 }
 815
 816 /* Architected read from a Neon Qreg. */
 817 static IRExpr* getQReg ( UInt qregNo ) {
 818    return llGetQReg( qregNo );
 819 }
 820
 821 /* Plain ("low level") write to a Neon Qreg. */
 822 static void llPutQReg ( UInt qregNo, IRExpr* e )
 823 {
 824    vassert(qregNo < 16);
 825    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
 826    stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
 827 }
 828
 829 /* Architected write to a Neon Qreg.  Handles conditional writes to the
 830    register: if guardT == IRTemp_INVALID then the write is
 831    unconditional. */
 832 static void putQReg ( UInt    qregNo,
 833                       IRExpr* e,
 834                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 835 {
 836    /* So, generate either an unconditional or a conditional write to
 837       the reg. */
 838    if (guardT == IRTemp_INVALID) {
 839       /* unconditional write */
 840       llPutQReg( qregNo, e );
 841    } else {
 842       llPutQReg( qregNo,
 843                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 844                              e, llGetQReg(qregNo) ));
 845    }
 846 }
 847
 848
 849 /* ---------------- Float registers ---------------- */
 850
 851 static Int floatGuestRegOffset ( UInt fregNo )
 852 {
 853    /* Start with the offset of the containing double, and then correct
 854       for endianness.  Actually this is completely bogus and needs
 855       careful thought. */
 856    Int off;
 857    /* NB! Limit is 64, not 32, because we might be pulling F32 bits
 858       out of SIMD registers, and there are 16 SIMD registers each of
 859       128 bits (4 x F32). */
 860    vassert(fregNo < 64);
 861    off = doubleGuestRegOffset(fregNo >> 1);
 862    if (host_endness == VexEndnessLE) {
 863       if (fregNo & 1)
 864          off += 4;
 865    } else {
 866       vassert(0);
 867    }
 868    return off;
 869 }
 870
 871 /* Plain ("low level") read from a VFP Freg. */
 872 static IRExpr* llGetFReg ( UInt fregNo )
 873 {
 874    vassert(fregNo < 32);
 875    return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
 876 }
 877
 878 static IRExpr* llGetFReg_up_to_64 ( UInt fregNo )
 879 {
 880    vassert(fregNo < 64);
 881    return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
 882 }
 883
 884 /* Architected read from a VFP Freg. */
 885 static IRExpr* getFReg ( UInt fregNo ) {
 886    return llGetFReg( fregNo );
 887 }
 888
 889 /* Plain ("low level") write to a VFP Freg. */
 890 static void llPutFReg ( UInt fregNo, IRExpr* e )
 891 {
 892    vassert(fregNo < 32);
 893    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
 894    stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
 895 }
 896
 897 static void llPutFReg_up_to_64 ( UInt fregNo, IRExpr* e )
 898 {
 899    vassert(fregNo < 64);
 900    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
 901    stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
 902 }
 903
 904 /* Architected write to a VFP Freg.  Handles conditional writes to the
 905    register: if guardT == IRTemp_INVALID then the write is
 906    unconditional. */
 907 static void putFReg ( UInt    fregNo,
 908                       IRExpr* e,
 909                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 910 {
 911    /* So, generate either an unconditional or a conditional write to
 912       the reg. */
 913    if (guardT == IRTemp_INVALID) {
 914       /* unconditional write */
 915       llPutFReg( fregNo, e );
 916    } else {
 917       llPutFReg( fregNo,
 918                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 919                              e, llGetFReg(fregNo) ));
 920    }
 921 }
 922
 923
 924 /* ---------------- Misc registers ---------------- */
 925
 926 static void putMiscReg32 ( UInt    gsoffset,
 927                            IRExpr* e, /* :: Ity_I32 */
 928                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 929 {
 930    switch (gsoffset) {
 931       case OFFB_FPSCR:   break;
 932       case OFFB_QFLAG32: break;
 933       case OFFB_GEFLAG0: break;
 934       case OFFB_GEFLAG1: break;
 935       case OFFB_GEFLAG2: break;
 936       case OFFB_GEFLAG3: break;
 937       case OFFB_TPIDRURW: break;
 938       default: vassert(0); /* awaiting more cases */
 939    }
 940    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
 941
 942    if (guardT == IRTemp_INVALID) {
 943       /* unconditional write */
 944       stmt(IRStmt_Put(gsoffset, e));
 945    } else {
 946       stmt(IRStmt_Put(
 947          gsoffset,
 948          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 949                      e, IRExpr_Get(gsoffset, Ity_I32) )
 950       ));
 951    }
 952 }
 953
 954 static IRTemp get_ITSTATE ( void )
 955 {
 956    ASSERT_IS_THUMB;
 957    IRTemp t = newTemp(Ity_I32);
 958    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
 959    return t;
 960 }
 961
 962 static void put_ITSTATE ( IRTemp t )
 963 {
 964    ASSERT_IS_THUMB;
 965    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
 966 }
 967
 968 static IRTemp get_QFLAG32 ( void )
 969 {
 970    IRTemp t = newTemp(Ity_I32);
 971    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
 972    return t;
 973 }
 974
 975 static void put_QFLAG32 ( IRTemp t, IRTemp condT )
 976 {
 977    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
 978 }
 979
 980 /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
 981    Status Register) to indicate that overflow or saturation occurred.
 982    Nb: t must be zero to denote no saturation, and any nonzero
 983    value to indicate saturation. */
 984 static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
 985 {
 986    IRTemp old = get_QFLAG32();
 987    IRTemp nyu = newTemp(Ity_I32);
 988    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
 989    put_QFLAG32(nyu, condT);
 990 }
 991
 992 /* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
 993    flagNo: which flag bit to set [3...0]
 994    lowbits_to_ignore:  0 = look at all 32 bits
 995                        8 = look at top 24 bits only
 996                       16 = look at top 16 bits only
 997                       31 = look at the top bit only
 998    e: input value to be evaluated.
 999    The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
1000    masked out.  If the resulting value is zero then the GE flag is
1001    set to 0; any other value sets the flag to 1. */
1002 static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
1003                            Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
1004                            IRExpr* e,             /* Ity_I32 */
1005                            IRTemp condT )
1006 {
1007    vassert( flagNo >= 0 && flagNo <= 3 );
1008    vassert( lowbits_to_ignore == 0  ||
1009             lowbits_to_ignore == 8  ||
1010             lowbits_to_ignore == 16 ||
1011             lowbits_to_ignore == 31 );
1012    IRTemp masked = newTemp(Ity_I32);
1013    assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
1014
1015    switch (flagNo) {
1016       case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
1017       case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
1018       case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
1019       case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
1020       default: vassert(0);
1021    }
1022 }
1023
1024 /* Return the (32-bit, zero-or-nonzero representation scheme) of
1025    the specified GE flag. */
1026 static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
1027 {
1028    switch (flagNo) {
1029       case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
1030       case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
1031       case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
1032       case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
1033       default: vassert(0);
1034    }
1035 }
1036
1037 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
1038    2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
1039    15 of the value.  All other bits are ignored. */
1040 static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
1041 {
1042    IRTemp ge10 = newTemp(Ity_I32);
1043    IRTemp ge32 = newTemp(Ity_I32);
1044    assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1045    assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1046    put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
1047    put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
1048    put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
1049    put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
1050 }
1051
1052
1053 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3
1054    from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
1055    bit 7.  All other bits are ignored. */
1056 static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
1057 {
1058    IRTemp ge0 = newTemp(Ity_I32);
1059    IRTemp ge1 = newTemp(Ity_I32);
1060    IRTemp ge2 = newTemp(Ity_I32);
1061    IRTemp ge3 = newTemp(Ity_I32);
1062    assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
1063    assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1064    assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
1065    assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1066    put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
1067    put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
1068    put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
1069    put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
1070 }
1071
1072
1073 /* ---------------- FPSCR stuff ---------------- */
1074
1075 /* Generate IR to get hold of the rounding mode bits in FPSCR, and
1076    convert them to IR format.  Bind the final result to the
1077    returned temp. */
1078 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1079 {
1080    /* The ARMvfp encoding for rounding mode bits is:
1081          00  to nearest
1082          01  to +infinity
1083          10  to -infinity
1084          11  to zero
1085       We need to convert that to the IR encoding:
1086          00  to nearest (the default)
1087          10  to +infinity
1088          01  to -infinity
1089          11  to zero
1090       Which can be done by swapping bits 0 and 1.
1091       The rmode bits are at 23:22 in FPSCR.
1092    */
1093    IRTemp armEncd = newTemp(Ity_I32);
1094    IRTemp swapped = newTemp(Ity_I32);
1095    /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
1096       we don't zero out bits 24 and above, since the assignment to
1097       'swapped' will mask them out anyway. */
1098    assign(armEncd,
1099           binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
1100    /* Now swap them. */
1101    assign(swapped,
1102           binop(Iop_Or32,
1103                 binop(Iop_And32,
1104                       binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1105                       mkU32(2)),
1106                 binop(Iop_And32,
1107                       binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1108                       mkU32(1))
1109          ));
1110    return swapped;
1111 }
1112
1113
1114 /*------------------------------------------------------------*/
1115 /*--- Helpers for flag handling and conditional insns      ---*/
1116 /*------------------------------------------------------------*/
1117
1118 static const HChar* name_ARMCondcode ( ARMCondcode cond )
1119 {
1120    switch (cond) {
1121       case ARMCondEQ:  return "{eq}";
1122       case ARMCondNE:  return "{ne}";
1123       case ARMCondHS:  return "{hs}";  // or 'cs'
1124       case ARMCondLO:  return "{lo}";  // or 'cc'
1125       case ARMCondMI:  return "{mi}";
1126       case ARMCondPL:  return "{pl}";
1127       case ARMCondVS:  return "{vs}";
1128       case ARMCondVC:  return "{vc}";
1129       case ARMCondHI:  return "{hi}";
1130       case ARMCondLS:  return "{ls}";
1131       case ARMCondGE:  return "{ge}";
1132       case ARMCondLT:  return "{lt}";
1133       case ARMCondGT:  return "{gt}";
1134       case ARMCondLE:  return "{le}";
1135       case ARMCondAL:  return ""; // {al}: is the default
1136       case ARMCondNV:  return "{nv}";
1137       default: vpanic("name_ARMCondcode");
1138    }
1139 }
1140 /* and a handy shorthand for it */
1141 static const HChar* nCC ( ARMCondcode cond ) {
1142    return name_ARMCondcode(cond);
1143 }
1144
1145
1146 /* Build IR to calculate some particular condition from stored
1147    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1148    Ity_I32, suitable for narrowing.  Although the return type is
1149    Ity_I32, the returned value is either 0 or 1.  'cond' must be
1150    :: Ity_I32 and must denote the condition to compute in
1151    bits 7:4, and be zero everywhere else.
1152 */
1153 static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
1154 {
1155    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
1156    /* And 'cond' had better produce a value in which only bits 7:4 are
1157       nonzero.  However, obviously we can't assert for that. */
1158
1159    /* So what we're constructing for the first argument is
1160       "(cond << 4) | stored-operation".
1161       However, as per comments above, 'cond' must be supplied
1162       pre-shifted to this function.
1163
1164       This pairing scheme requires that the ARM_CC_OP_ values all fit
1165       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
1166       8 bits of the first argument. */
1167    IRExpr** args
1168       = mkIRExprVec_4(
1169            binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
1170            IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1171            IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1172            IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
1173         );
1174    IRExpr* call
1175       = mkIRExprCCall(
1176            Ity_I32,
1177            0/*regparm*/,
1178            "armg_calculate_condition", &armg_calculate_condition,
1179            args
1180         );
1181
1182    /* Exclude the requested condition, OP and NDEP from definedness
1183       checking.  We're only interested in DEP1 and DEP2. */
1184    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1185    return call;
1186 }
1187
1188
1189 /* Build IR to calculate some particular condition from stored
1190    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1191    Ity_I32, suitable for narrowing.  Although the return type is
1192    Ity_I32, the returned value is either 0 or 1.
1193 */
1194 static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
1195 {
1196   /* First arg is "(cond << 4) | condition".  This requires that the
1197      ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
1198      (COND, OP) pair in the lowest 8 bits of the first argument. */
1199    vassert(cond >= 0 && cond <= 15);
1200    return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
1201 }
1202
1203
1204 /* Build IR to calculate just the carry flag from stored
1205    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1206    Ity_I32. */
1207 static IRExpr* mk_armg_calculate_flag_c ( void )
1208 {
1209    IRExpr** args
1210       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1211                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1212                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1213                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1214    IRExpr* call
1215       = mkIRExprCCall(
1216            Ity_I32,
1217            0/*regparm*/,
1218            "armg_calculate_flag_c", &armg_calculate_flag_c,
1219            args
1220         );
1221    /* Exclude OP and NDEP from definedness checking.  We're only
1222       interested in DEP1 and DEP2. */
1223    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1224    return call;
1225 }
1226
1227
1228 /* Build IR to calculate just the overflow flag from stored
1229    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1230    Ity_I32. */
1231 static IRExpr* mk_armg_calculate_flag_v ( void )
1232 {
1233    IRExpr** args
1234       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1235                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1236                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1237                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1238    IRExpr* call
1239       = mkIRExprCCall(
1240            Ity_I32,
1241            0/*regparm*/,
1242            "armg_calculate_flag_v", &armg_calculate_flag_v,
1243            args
1244         );
1245    /* Exclude OP and NDEP from definedness checking.  We're only
1246       interested in DEP1 and DEP2. */
1247    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1248    return call;
1249 }
1250
1251
1252 /* Build IR to calculate N Z C V in bits 31:28 of the
1253    returned word. */
1254 static IRExpr* mk_armg_calculate_flags_nzcv ( void )
1255 {
1256    IRExpr** args
1257       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1258                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1259                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1260                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1261    IRExpr* call
1262       = mkIRExprCCall(
1263            Ity_I32,
1264            0/*regparm*/,
1265            "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
1266            args
1267         );
1268    /* Exclude OP and NDEP from definedness checking.  We're only
1269       interested in DEP1 and DEP2. */
1270    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1271    return call;
1272 }
1273
1274 static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
1275 {
1276    IRExpr** args1;
1277    IRExpr** args2;
1278    IRExpr *call1, *call2, *res;
1279
1280    if (Q) {
1281       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
1282                               binop(Iop_GetElem32x4, resL, mkU8(1)),
1283                               binop(Iop_GetElem32x4, resR, mkU8(0)),
1284                               binop(Iop_GetElem32x4, resR, mkU8(1)) );
1285       args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
1286                               binop(Iop_GetElem32x4, resL, mkU8(3)),
1287                               binop(Iop_GetElem32x4, resR, mkU8(2)),
1288                               binop(Iop_GetElem32x4, resR, mkU8(3)) );
1289    } else {
1290       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
1291                               binop(Iop_GetElem32x2, resL, mkU8(1)),
1292                               binop(Iop_GetElem32x2, resR, mkU8(0)),
1293                               binop(Iop_GetElem32x2, resR, mkU8(1)) );
1294    }
1295
1296    call1 = mkIRExprCCall(
1297              Ity_I32,
1298              0/*regparm*/,
1299              "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1300              args1
1301           );
1302    if (Q) {
1303       call2 = mkIRExprCCall(
1304                 Ity_I32,
1305                 0/*regparm*/,
1306                 "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1307                 args2
1308              );
1309    }
1310    if (Q) {
1311       res = binop(Iop_Or32, call1, call2);
1312    } else {
1313       res = call1;
1314    }
1315    return res;
1316 }
1317
1318 // FIXME: this is named wrongly .. looks like a sticky set of
1319 // QC, not a write to it.
1320 static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
1321                          IRTemp condT )
1322 {
1323    putMiscReg32 (OFFB_FPSCR,
1324                  binop(Iop_Or32,
1325                        IRExpr_Get(OFFB_FPSCR, Ity_I32),
1326                        binop(Iop_Shl32,
1327                              mk_armg_calculate_flag_qc(resL, resR, Q),
1328                              mkU8(27))),
1329                  condT);
1330 }
1331
1332 /* Build IR to conditionally set the flags thunk.  As with putIReg, if
1333    guard is IRTemp_INVALID then it's unconditional, else it holds a
1334    condition :: Ity_I32. */
1335 static
1336 void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
1337                          IRTemp t_dep2, IRTemp t_ndep,
1338                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1339 {
1340    vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
1341    vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
1342    vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
1343    // strictly unsigned cc_op must always be >= 0,  keeping for readability
1344    vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
1345    if (guardT == IRTemp_INVALID) {
1346       /* unconditional */
1347       stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
1348       stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1349       stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1350       stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1351    } else {
1352       /* conditional */
1353       IRTemp c1 = newTemp(Ity_I1);
1354       assign( c1, binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)) );
1355       stmt( IRStmt_Put(
1356                OFFB_CC_OP,
1357                IRExpr_ITE( mkexpr(c1),
1358                            mkU32(cc_op),
1359                            IRExpr_Get(OFFB_CC_OP, Ity_I32) ) ));
1360       stmt( IRStmt_Put(
1361                OFFB_CC_DEP1,
1362                IRExpr_ITE( mkexpr(c1),
1363                            mkexpr(t_dep1),
1364                            IRExpr_Get(OFFB_CC_DEP1, Ity_I32) ) ));
1365       stmt( IRStmt_Put(
1366                OFFB_CC_DEP2,
1367                IRExpr_ITE( mkexpr(c1),
1368                            mkexpr(t_dep2),
1369                            IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ) ));
1370       stmt( IRStmt_Put(
1371                OFFB_CC_NDEP,
1372                IRExpr_ITE( mkexpr(c1),
1373                            mkexpr(t_ndep),
1374                            IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ) ));
1375    }
1376 }
1377
1378
1379 /* Minor variant of the above that sets NDEP to zero (if it
1380    sets it at all) */
1381 static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1382                              IRTemp t_dep2,
1383                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1384 {
1385    IRTemp z32 = newTemp(Ity_I32);
1386    assign( z32, mkU32(0) );
1387    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1388 }
1389
1390
1391 /* Minor variant of the above that sets DEP2 to zero (if it
1392    sets it at all) */
1393 static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1394                              IRTemp t_ndep,
1395                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1396 {
1397    IRTemp z32 = newTemp(Ity_I32);
1398    assign( z32, mkU32(0) );
1399    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1400 }
1401
1402
1403 /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1404    sets them at all) */
1405 static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1406                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1407 {
1408    IRTemp z32 = newTemp(Ity_I32);
1409    assign( z32, mkU32(0) );
1410    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1411 }
1412
1413
1414 /* ARM only */
1415 /* Generate a side-exit to the next instruction, if the given guard
1416    expression :: Ity_I32 is 0 (note!  the side exit is taken if the
1417    condition is false!)  This is used to skip over conditional
1418    instructions which we can't generate straight-line code for, either
1419    because they are too complex or (more likely) they potentially
1420    generate exceptions.
1421 */
1422 static void mk_skip_over_A32_if_cond_is_false (
1423                IRTemp guardT /* :: Ity_I32, 0 or 1 */
1424             )
1425 {
1426    ASSERT_IS_ARM;
1427    vassert(guardT != IRTemp_INVALID);
1428    vassert(0 == (guest_R15_curr_instr_notENC & 3));
1429    stmt( IRStmt_Exit(
1430             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1431             Ijk_Boring,
1432             IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
1433             OFFB_R15T
1434        ));
1435 }
1436
1437 /* Thumb16 only */
1438 /* ditto, but jump over a 16-bit thumb insn */
1439 static void mk_skip_over_T16_if_cond_is_false (
1440                IRTemp guardT /* :: Ity_I32, 0 or 1 */
1441             )
1442 {
1443    ASSERT_IS_THUMB;
1444    vassert(guardT != IRTemp_INVALID);
1445    vassert(0 == (guest_R15_curr_instr_notENC & 1));
1446    stmt( IRStmt_Exit(
1447             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1448             Ijk_Boring,
1449             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
1450             OFFB_R15T
1451        ));
1452 }
1453
1454
1455 /* Thumb32 only */
1456 /* ditto, but jump over a 32-bit thumb insn */
1457 static void mk_skip_over_T32_if_cond_is_false (
1458                IRTemp guardT /* :: Ity_I32, 0 or 1 */
1459             )
1460 {
1461    ASSERT_IS_THUMB;
1462    vassert(guardT != IRTemp_INVALID);
1463    vassert(0 == (guest_R15_curr_instr_notENC & 1));
1464    stmt( IRStmt_Exit(
1465             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1466             Ijk_Boring,
1467             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
1468             OFFB_R15T
1469        ));
1470 }
1471
1472
1473 /* Thumb16 and Thumb32 only
1474    Generate a SIGILL followed by a restart of the current instruction
1475    if the given temp is nonzero. */
1476 static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
1477 {
1478    ASSERT_IS_THUMB;
1479    vassert(t != IRTemp_INVALID);
1480    vassert(0 == (guest_R15_curr_instr_notENC & 1));
1481    stmt(
1482       IRStmt_Exit(
1483          binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
1484          Ijk_NoDecode,
1485          IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
1486          OFFB_R15T
1487       )
1488    );
1489 }
1490
1491
1492 /* Inspect the old_itstate, and generate a SIGILL if it indicates that
1493    we are currently in an IT block and are not the last in the block.
1494    This also rolls back guest_ITSTATE to its old value before the exit
1495    and restores it to its new value afterwards.  This is so that if
1496    the exit is taken, we have an up to date version of ITSTATE
1497    available.  Without doing that, we have no hope of making precise
1498    exceptions work. */
1499 static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
1500                IRTemp old_itstate /* :: Ity_I32 */,
1501                IRTemp new_itstate /* :: Ity_I32 */
1502             )
1503 {
1504    ASSERT_IS_THUMB;
1505    put_ITSTATE(old_itstate); // backout
1506    IRTemp guards_for_next3 = newTemp(Ity_I32);
1507    assign(guards_for_next3,
1508           binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
1509    gen_SIGILL_T_if_nonzero(guards_for_next3);
1510    put_ITSTATE(new_itstate); //restore
1511 }
1512
1513
1514 /* Simpler version of the above, which generates a SIGILL if
1515    we're anywhere within an IT block. */
1516 static void gen_SIGILL_T_if_in_ITBlock (
1517                IRTemp old_itstate /* :: Ity_I32 */,
1518                IRTemp new_itstate /* :: Ity_I32 */
1519             )
1520 {
1521    put_ITSTATE(old_itstate); // backout
1522    gen_SIGILL_T_if_nonzero(old_itstate);
1523    put_ITSTATE(new_itstate); //restore
1524 }
1525
1526
1527 /* Generate an APSR value, from the NZCV thunk, and
1528    from QFLAG32 and GEFLAG0 .. GEFLAG3. */
1529 static IRTemp synthesise_APSR ( void )
1530 {
1531    IRTemp res1 = newTemp(Ity_I32);
1532    // Get NZCV
1533    assign( res1, mk_armg_calculate_flags_nzcv() );
1534    // OR in the Q value
1535    IRTemp res2 = newTemp(Ity_I32);
1536    assign(
1537       res2,
1538       binop(Iop_Or32,
1539             mkexpr(res1),
1540             binop(Iop_Shl32,
1541                   unop(Iop_1Uto32,
1542                        binop(Iop_CmpNE32,
1543                              mkexpr(get_QFLAG32()),
1544                              mkU32(0))),
1545                   mkU8(ARMG_CC_SHIFT_Q)))
1546    );
1547    // OR in GE0 .. GE3
1548    IRExpr* ge0
1549       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
1550    IRExpr* ge1
1551       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
1552    IRExpr* ge2
1553       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
1554    IRExpr* ge3
1555       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
1556    IRTemp res3 = newTemp(Ity_I32);
1557    assign(res3,
1558           binop(Iop_Or32,
1559                 mkexpr(res2),
1560                 binop(Iop_Or32,
1561                       binop(Iop_Or32,
1562                             binop(Iop_Shl32, ge0, mkU8(16)),
1563                             binop(Iop_Shl32, ge1, mkU8(17))),
1564                       binop(Iop_Or32,
1565                             binop(Iop_Shl32, ge2, mkU8(18)),
1566                             binop(Iop_Shl32, ge3, mkU8(19))) )));
1567    return res3;
1568 }
1569
1570
1571 /* and the inverse transformation: given an APSR value,
1572    set the NZCV thunk, the Q flag, and the GE flags. */
1573 static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
1574                                 IRTemp apsrT, IRTemp condT )
1575 {
1576    vassert(write_nzcvq || write_ge);
1577    if (write_nzcvq) {
1578       // Do NZCV
1579       IRTemp immT = newTemp(Ity_I32);
1580       assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
1581       setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
1582       // Do Q
1583       IRTemp qnewT = newTemp(Ity_I32);
1584       assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
1585       put_QFLAG32(qnewT, condT);
1586    }
1587    if (write_ge) {
1588       // Do GE3..0
1589       put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
1590                    condT);
1591       put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
1592                    condT);
1593       put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
1594                    condT);
1595       put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
1596                    condT);
1597    }
1598 }
1599
1600
1601 /*------------------------------------------------------------*/
1602 /*--- Helpers for saturation                               ---*/
1603 /*------------------------------------------------------------*/
1604
1605 /* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
1606    (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
1607    (b) the floor is computed from the value of imm5.  these two fnsn
1608    should be commoned up. */
1609
1610 /* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
1611    Optionally return flag resQ saying whether saturation occurred.
1612    See definition in manual, section A2.2.1, page 41
1613    (bits(N), boolean) UnsignedSatQ( integer i, integer N )
1614    {
1615      if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
1616      elsif ( i < 0 )    { result = 0; saturated = TRUE; }
1617      else               { result = i; saturated = FALSE; }
1618      return ( result<N-1:0>, saturated );
1619    }
1620 */
1621 static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
1622                              IRTemp* resQ, /* OUT - Ity_I32  */
1623                              IRTemp regT,  /* value to clamp - Ity_I32 */
1624                              UInt imm5 )   /* saturation ceiling */
1625 {
1626    ULong ceil64  = (1ULL << imm5) - 1;    // (2^imm5)-1
1627    UInt  ceil    = (UInt)ceil64;
1628    UInt  floor   = 0;
1629
1630    IRTemp nd0 = newTemp(Ity_I32);
1631    IRTemp nd1 = newTemp(Ity_I32);
1632    IRTemp nd2 = newTemp(Ity_I1);
1633    IRTemp nd3 = newTemp(Ity_I32);
1634    IRTemp nd4 = newTemp(Ity_I32);
1635    IRTemp nd5 = newTemp(Ity_I1);
1636    IRTemp nd6 = newTemp(Ity_I32);
1637
1638    assign( nd0, mkexpr(regT) );
1639    assign( nd1, mkU32(ceil) );
1640    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1641    assign( nd3, IRExpr_ITE(mkexpr(nd2), mkexpr(nd1), mkexpr(nd0)) );
1642    assign( nd4, mkU32(floor) );
1643    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1644    assign( nd6, IRExpr_ITE(mkexpr(nd5), mkexpr(nd4), mkexpr(nd3)) );
1645    assign( *res, mkexpr(nd6) );
1646
1647    /* if saturation occurred, then resQ is set to some nonzero value
1648       if sat did not occur, resQ is guaranteed to be zero. */
1649    if (resQ) {
1650       assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1651    }
1652 }
1653
1654
1655 /* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
1656    Optionally return flag resQ saying whether saturation occurred.
1657    - see definition in manual, section A2.2.1, page 41
1658    (bits(N), boolean ) SignedSatQ( integer i, integer N )
1659    {
1660      if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
1661      elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
1662      else                      { result = i;           saturated = FALSE; }
1663      return ( result[N-1:0], saturated );
1664    }
1665 */
1666 static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
1667                            UInt imm5,      /* saturation ceiling */
1668                            IRTemp* res,    /* OUT - Ity_I32 */
1669                            IRTemp* resQ )  /* OUT - Ity_I32  */
1670 {
1671    Long ceil64  =  (1LL << (imm5-1)) - 1;  //  (2^(imm5-1))-1
1672    Long floor64 = -(1LL << (imm5-1));      // -(2^(imm5-1))
1673    Int  ceil    = (Int)ceil64;
1674    Int  floor   = (Int)floor64;
1675
1676    IRTemp nd0 = newTemp(Ity_I32);
1677    IRTemp nd1 = newTemp(Ity_I32);
1678    IRTemp nd2 = newTemp(Ity_I1);
1679    IRTemp nd3 = newTemp(Ity_I32);
1680    IRTemp nd4 = newTemp(Ity_I32);
1681    IRTemp nd5 = newTemp(Ity_I1);
1682    IRTemp nd6 = newTemp(Ity_I32);
1683
1684    assign( nd0, mkexpr(regT) );
1685    assign( nd1, mkU32(ceil) );
1686    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1687    assign( nd3, IRExpr_ITE( mkexpr(nd2), mkexpr(nd1), mkexpr(nd0) ) );
1688    assign( nd4, mkU32(floor) );
1689    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1690    assign( nd6, IRExpr_ITE( mkexpr(nd5), mkexpr(nd4), mkexpr(nd3) ) );
1691    assign( *res, mkexpr(nd6) );
1692
1693    /* if saturation occurred, then resQ is set to some nonzero value
1694       if sat did not occur, resQ is guaranteed to be zero. */
1695    if (resQ) {
1696      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1697    }
1698 }
1699
1700
1701 /* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
1702    overflow occurred for 32-bit addition.  Needs both args and the
1703    result.  HD p27. */
1704 static
1705 IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
1706                                       IRTemp argL, IRTemp argR )
1707 {
1708    IRTemp res = newTemp(Ity_I32);
1709    assign(res, resE);
1710    return
1711       binop( Iop_Shr32,
1712              binop( Iop_And32,
1713                     binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
1714                     binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
1715              mkU8(31) );
1716 }
1717
1718 /* Similarly .. also from HD p27 .. */
1719 static
1720 IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
1721                                       IRTemp argL, IRTemp argR )
1722 {
1723    IRTemp res = newTemp(Ity_I32);
1724    assign(res, resE);
1725    return
1726       binop( Iop_Shr32,
1727              binop( Iop_And32,
1728                     binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
1729                     binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )),
1730              mkU8(31) );
1731 }
1732
1733
1734 /*------------------------------------------------------------*/
1735 /*--- Larger helpers                                       ---*/
1736 /*------------------------------------------------------------*/
1737
1738 /* Compute both the result and new C flag value for a LSL by an imm5
1739    or by a register operand.  May generate reads of the old C value
1740    (hence only safe to use before any writes to guest state happen).
1741    Are factored out so can be used by both ARM and Thumb.
1742
1743    Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
1744    "res" (the result)  is a.k.a. "shop", shifter operand
1745    "newC" (the new C)  is a.k.a. "shco", shifter carry out
1746
1747    The calling convention for res and newC is a bit funny.  They could
1748    be passed by value, but instead are passed by ref.
1749
1750    The C (shco) value computed must be zero in bits 31:1, as the IR
1751    optimisations for flag handling (guest_arm_spechelper) rely on
1752    that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
1753    for it.  Same applies to all these functions that compute shco
1754    after a shift or rotate, not just this one.
1755 */
1756
1757 static void compute_result_and_C_after_LSL_by_imm5 (
1758                /*OUT*/HChar* buf,
1759                IRTemp* res,
1760                IRTemp* newC,
1761                IRTemp rMt, UInt shift_amt, /* operands */
1762                UInt rM      /* only for debug printing */
1763             )
1764 {
1765    if (shift_amt == 0) {
1766       if (newC) {
1767          assign( *newC, mk_armg_calculate_flag_c() );
1768       }
1769       assign( *res, mkexpr(rMt) );
1770       DIS(buf, "r%u", rM);
1771    } else {
1772       vassert(shift_amt >= 1 && shift_amt <= 31);
1773       if (newC) {
1774          assign( *newC,
1775                  binop(Iop_And32,
1776                        binop(Iop_Shr32, mkexpr(rMt),
1777                                         mkU8(32 - shift_amt)),
1778                        mkU32(1)));
1779       }
1780       assign( *res,
1781               binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
1782       DIS(buf, "r%u, LSL #%u", rM, shift_amt);
1783    }
1784 }
1785
1786
1787 static void compute_result_and_C_after_LSL_by_reg (
1788                /*OUT*/HChar* buf,
1789                IRTemp* res,
1790                IRTemp* newC,
1791                IRTemp rMt, IRTemp rSt,  /* operands */
1792                UInt rM,    UInt rS      /* only for debug printing */
1793             )
1794 {
1795    // shift left in range 0 .. 255
1796    // amt  = rS & 255
1797    // res  = amt < 32 ?  Rm << amt  : 0
1798    // newC = amt == 0     ? oldC  :
1799    //        amt in 1..32 ?  Rm[32-amt]  : 0
1800    IRTemp amtT = newTemp(Ity_I32);
1801    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1802    if (newC) {
1803       /* mux0X(amt == 0,
1804                mux0X(amt < 32,
1805                      0,
1806                      Rm[(32-amt) & 31]),
1807                oldC)
1808       */
1809       /* About the best you can do is pray that iropt is able
1810          to nuke most or all of the following junk. */
1811       IRTemp oldC = newTemp(Ity_I32);
1812       assign(oldC, mk_armg_calculate_flag_c() );
1813       assign(
1814          *newC,
1815          IRExpr_ITE(
1816             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1817             mkexpr(oldC),
1818             IRExpr_ITE(
1819                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1820                binop(Iop_And32,
1821                      binop(Iop_Shr32,
1822                            mkexpr(rMt),
1823                            unop(Iop_32to8,
1824                                 binop(Iop_And32,
1825                                       binop(Iop_Sub32,
1826                                             mkU32(32),
1827                                             mkexpr(amtT)),
1828                                       mkU32(31)
1829                                 )
1830                            )
1831                      ),
1832                      mkU32(1)
1833                      ),
1834                mkU32(0)
1835             )
1836          )
1837       );
1838    }
1839    // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1840    // Lhs of the & limits the shift to 31 bits, so as to
1841    // give known IR semantics.  Rhs of the & is all 1s for
1842    // Rs <= 31 and all 0s for Rs >= 32.
1843    assign(
1844       *res,
1845       binop(
1846          Iop_And32,
1847          binop(Iop_Shl32,
1848                mkexpr(rMt),
1849                unop(Iop_32to8,
1850                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1851          binop(Iop_Sar32,
1852                binop(Iop_Sub32,
1853                      mkexpr(amtT),
1854                      mkU32(32)),
1855                mkU8(31))));
1856     DIS(buf, "r%u, LSL r%u", rM, rS);
1857 }
1858
1859
1860 static void compute_result_and_C_after_LSR_by_imm5 (
1861                /*OUT*/HChar* buf,
1862                IRTemp* res,
1863                IRTemp* newC,
1864                IRTemp rMt, UInt shift_amt, /* operands */
1865                UInt rM      /* only for debug printing */
1866             )
1867 {
1868    if (shift_amt == 0) {
1869       // conceptually a 32-bit shift, however:
1870       // res  = 0
1871       // newC = Rm[31]
1872       if (newC) {
1873          assign( *newC,
1874                  binop(Iop_And32,
1875                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1876                        mkU32(1)));
1877       }
1878       assign( *res, mkU32(0) );
1879       DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
1880    } else {
1881       // shift in range 1..31
1882       // res  = Rm >>u shift_amt
1883       // newC = Rm[shift_amt - 1]
1884       vassert(shift_amt >= 1 && shift_amt <= 31);
1885       if (newC) {
1886          assign( *newC,
1887                  binop(Iop_And32,
1888                        binop(Iop_Shr32, mkexpr(rMt),
1889                                         mkU8(shift_amt - 1)),
1890                        mkU32(1)));
1891       }
1892       assign( *res,
1893               binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
1894       DIS(buf, "r%u, LSR #%u", rM, shift_amt);
1895    }
1896 }
1897
1898
1899 static void compute_result_and_C_after_LSR_by_reg (
1900                /*OUT*/HChar* buf,
1901                IRTemp* res,
1902                IRTemp* newC,
1903                IRTemp rMt, IRTemp rSt,  /* operands */
1904                UInt rM,    UInt rS      /* only for debug printing */
1905             )
1906 {
1907    // shift right in range 0 .. 255
1908    // amt = rS & 255
1909    // res  = amt < 32 ?  Rm >>u amt  : 0
1910    // newC = amt == 0     ? oldC  :
1911    //        amt in 1..32 ?  Rm[amt-1]  : 0
1912    IRTemp amtT = newTemp(Ity_I32);
1913    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1914    if (newC) {
1915       /* mux0X(amt == 0,
1916                mux0X(amt < 32,
1917                      0,
1918                      Rm[(amt-1) & 31]),
1919                oldC)
1920       */
1921       IRTemp oldC = newTemp(Ity_I32);
1922       assign(oldC, mk_armg_calculate_flag_c() );
1923       assign(
1924          *newC,
1925          IRExpr_ITE(
1926             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1927             mkexpr(oldC),
1928             IRExpr_ITE(
1929                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1930                binop(Iop_And32,
1931                      binop(Iop_Shr32,
1932                            mkexpr(rMt),
1933                            unop(Iop_32to8,
1934                                 binop(Iop_And32,
1935                                       binop(Iop_Sub32,
1936                                             mkexpr(amtT),
1937                                             mkU32(1)),
1938                                       mkU32(31)
1939                                 )
1940                            )
1941                      ),
1942                      mkU32(1)
1943                      ),
1944                mkU32(0)
1945             )
1946          )
1947       );
1948    }
1949    // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1950    // Lhs of the & limits the shift to 31 bits, so as to
1951    // give known IR semantics.  Rhs of the & is all 1s for
1952    // Rs <= 31 and all 0s for Rs >= 32.
1953    assign(
1954       *res,
1955       binop(
1956          Iop_And32,
1957          binop(Iop_Shr32,
1958                mkexpr(rMt),
1959                unop(Iop_32to8,
1960                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1961          binop(Iop_Sar32,
1962                binop(Iop_Sub32,
1963                      mkexpr(amtT),
1964                      mkU32(32)),
1965                mkU8(31))));
1966     DIS(buf, "r%u, LSR r%u", rM, rS);
1967 }
1968
1969
1970 static void compute_result_and_C_after_ASR_by_imm5 (
1971                /*OUT*/HChar* buf,
1972                IRTemp* res,
1973                IRTemp* newC,
1974                IRTemp rMt, UInt shift_amt, /* operands */
1975                UInt rM      /* only for debug printing */
1976             )
1977 {
1978    if (shift_amt == 0) {
1979       // conceptually a 32-bit shift, however:
1980       // res  = Rm >>s 31
1981       // newC = Rm[31]
1982       if (newC) {
1983          assign( *newC,
1984                  binop(Iop_And32,
1985                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1986                        mkU32(1)));
1987       }
1988       assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
1989       DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
1990    } else {
1991       // shift in range 1..31
1992       // res = Rm >>s shift_amt
1993       // newC = Rm[shift_amt - 1]
1994       vassert(shift_amt >= 1 && shift_amt <= 31);
1995       if (newC) {
1996          assign( *newC,
1997                  binop(Iop_And32,
1998                        binop(Iop_Shr32, mkexpr(rMt),
1999                                         mkU8(shift_amt - 1)),
2000                        mkU32(1)));
2001       }
2002       assign( *res,
2003               binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
2004       DIS(buf, "r%u, ASR #%u", rM, shift_amt);
2005    }
2006 }
2007
2008
2009 static void compute_result_and_C_after_ASR_by_reg (
2010                /*OUT*/HChar* buf,
2011                IRTemp* res,
2012                IRTemp* newC,
2013                IRTemp rMt, IRTemp rSt,  /* operands */
2014                UInt rM,    UInt rS      /* only for debug printing */
2015             )
2016 {
2017    // arithmetic shift right in range 0 .. 255
2018    // amt = rS & 255
2019    // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
2020    // newC = amt == 0     ? oldC  :
2021    //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
2022    IRTemp amtT = newTemp(Ity_I32);
2023    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
2024    if (newC) {
2025       /* mux0X(amt == 0,
2026                mux0X(amt < 32,
2027                      Rm[31],
2028                      Rm[(amt-1) & 31])
2029                oldC)
2030       */
2031       IRTemp oldC = newTemp(Ity_I32);
2032       assign(oldC, mk_armg_calculate_flag_c() );
2033       assign(
2034          *newC,
2035          IRExpr_ITE(
2036             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
2037             mkexpr(oldC),
2038             IRExpr_ITE(
2039                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
2040                binop(Iop_And32,
2041                      binop(Iop_Shr32,
2042                            mkexpr(rMt),
2043                            unop(Iop_32to8,
2044                                 binop(Iop_And32,
2045                                       binop(Iop_Sub32,
2046                                             mkexpr(amtT),
2047                                             mkU32(1)),
2048                                       mkU32(31)
2049                                 )
2050                            )
2051                      ),
2052                      mkU32(1)
2053                      ),
2054                binop(Iop_And32,
2055                      binop(Iop_Shr32,
2056                            mkexpr(rMt),
2057                            mkU8(31)
2058                      ),
2059                      mkU32(1)
2060                )
2061             )
2062          )
2063       );
2064    }
2065    // (Rm >>s (amt <u 32 ? amt : 31))
2066    assign(
2067       *res,
2068       binop(
2069          Iop_Sar32,
2070          mkexpr(rMt),
2071          unop(
2072             Iop_32to8,
2073             IRExpr_ITE(
2074                binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32)),
2075                mkexpr(amtT),
2076                mkU32(31)))));
2077     DIS(buf, "r%u, ASR r%u", rM, rS);
2078 }
2079
2080
2081 static void compute_result_and_C_after_ROR_by_reg (
2082                /*OUT*/HChar* buf,
2083                IRTemp* res,
2084                IRTemp* newC,
2085                IRTemp rMt, IRTemp rSt,  /* operands */
2086                UInt rM,    UInt rS      /* only for debug printing */
2087             )
2088 {
2089    // rotate right in range 0 .. 255
2090    // amt = rS & 255
2091    // shop =  Rm `ror` (amt & 31)
2092    // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
2093    IRTemp amtT = newTemp(Ity_I32);
2094    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
2095    IRTemp amt5T = newTemp(Ity_I32);
2096    assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
2097    IRTemp oldC = newTemp(Ity_I32);
2098    assign(oldC, mk_armg_calculate_flag_c() );
2099    if (newC) {
2100       assign(
2101          *newC,
2102          IRExpr_ITE(
2103             binop(Iop_CmpNE32, mkexpr(amtT), mkU32(0)),
2104             binop(Iop_And32,
2105                   binop(Iop_Shr32,
2106                         mkexpr(rMt),
2107                         unop(Iop_32to8,
2108                              binop(Iop_And32,
2109                                    binop(Iop_Sub32,
2110                                          mkexpr(amtT),
2111                                          mkU32(1)
2112                                    ),
2113                                    mkU32(31)
2114                              )
2115                         )
2116                   ),
2117                   mkU32(1)
2118             ),
2119             mkexpr(oldC)
2120          )
2121       );
2122    }
2123    assign(
2124       *res,
2125       IRExpr_ITE(
2126          binop(Iop_CmpNE32, mkexpr(amt5T), mkU32(0)),
2127          binop(Iop_Or32,
2128                binop(Iop_Shr32,
2129                      mkexpr(rMt),
2130                      unop(Iop_32to8, mkexpr(amt5T))
2131                ),
2132                binop(Iop_Shl32,
2133                      mkexpr(rMt),
2134                      unop(Iop_32to8,
2135                           binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
2136                      )
2137                )
2138                ),
2139          mkexpr(rMt)
2140       )
2141    );
2142    DIS(buf, "r%u, ROR r#%u", rM, rS);
2143 }
2144
2145
2146 /* Generate an expression corresponding to the immediate-shift case of
2147    a shifter operand.  This is used both for ARM and Thumb2.
2148
2149    Bind it to a temporary, and return that via *res.  If newC is
2150    non-NULL, also compute a value for the shifter's carry out (in the
2151    LSB of a word), bind it to a temporary, and return that via *shco.
2152
2153    Generates GETs from the guest state and is therefore not safe to
2154    use once we start doing PUTs to it, for any given instruction.
2155
2156    'how' is encoded thusly:
2157       00b LSL,  01b LSR,  10b ASR,  11b ROR
2158    Most but not all ARM and Thumb integer insns use this encoding.
2159    Be careful to ensure the right value is passed here.
2160 */
2161 static void compute_result_and_C_after_shift_by_imm5 (
2162                /*OUT*/HChar* buf,
2163                /*OUT*/IRTemp* res,
2164                /*OUT*/IRTemp* newC,
2165                IRTemp  rMt,       /* reg to shift */
2166                UInt    how,       /* what kind of shift */
2167                UInt    shift_amt, /* shift amount (0..31) */
2168                UInt    rM         /* only for debug printing */
2169             )
2170 {
2171    vassert(shift_amt < 32);
2172    vassert(how < 4);
2173
2174    switch (how) {
2175
2176       case 0:
2177          compute_result_and_C_after_LSL_by_imm5(
2178             buf, res, newC, rMt, shift_amt, rM
2179          );
2180          break;
2181
2182       case 1:
2183          compute_result_and_C_after_LSR_by_imm5(
2184             buf, res, newC, rMt, shift_amt, rM
2185          );
2186          break;
2187
2188       case 2:
2189          compute_result_and_C_after_ASR_by_imm5(
2190             buf, res, newC, rMt, shift_amt, rM
2191          );
2192          break;
2193
2194       case 3:
2195          if (shift_amt == 0) {
2196             IRTemp oldcT = newTemp(Ity_I32);
2197             // rotate right 1 bit through carry (?)
2198             // RRX -- described at ARM ARM A5-17
2199             // res  = (oldC << 31) | (Rm >>u 1)
2200             // newC = Rm[0]
2201             if (newC) {
2202                assign( *newC,
2203                        binop(Iop_And32, mkexpr(rMt), mkU32(1)));
2204             }
2205             assign( oldcT, mk_armg_calculate_flag_c() );
2206             assign( *res,
2207                     binop(Iop_Or32,
2208                           binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
2209                           binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
2210             DIS(buf, "r%u, RRX", rM);
2211          } else {
2212             // rotate right in range 1..31
2213             // res  = Rm `ror` shift_amt
2214             // newC = Rm[shift_amt - 1]
2215             vassert(shift_amt >= 1 && shift_amt <= 31);
2216             if (newC) {
2217                assign( *newC,
2218                        binop(Iop_And32,
2219                              binop(Iop_Shr32, mkexpr(rMt),
2220                                               mkU8(shift_amt - 1)),
2221                              mkU32(1)));
2222             }
2223             assign( *res,
2224                     binop(Iop_Or32,
2225                           binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
2226                           binop(Iop_Shl32, mkexpr(rMt),
2227                                            mkU8(32-shift_amt))));
2228             DIS(buf, "r%u, ROR #%u", rM, shift_amt);
2229          }
2230          break;
2231
2232       default:
2233          /*NOTREACHED*/
2234          vassert(0);
2235    }
2236 }
2237
2238
2239 /* Generate an expression corresponding to the register-shift case of
2240    a shifter operand.  This is used both for ARM and Thumb2.
2241
2242    Bind it to a temporary, and return that via *res.  If newC is
2243    non-NULL, also compute a value for the shifter's carry out (in the
2244    LSB of a word), bind it to a temporary, and return that via *shco.
2245
2246    Generates GETs from the guest state and is therefore not safe to
2247    use once we start doing PUTs to it, for any given instruction.
2248
2249    'how' is encoded thusly:
2250       00b LSL,  01b LSR,  10b ASR,  11b ROR
2251    Most but not all ARM and Thumb integer insns use this encoding.
2252    Be careful to ensure the right value is passed here.
2253 */
2254 static void compute_result_and_C_after_shift_by_reg (
2255                /*OUT*/HChar*  buf,
2256                /*OUT*/IRTemp* res,
2257                /*OUT*/IRTemp* newC,
2258                IRTemp  rMt,       /* reg to shift */
2259                UInt    how,       /* what kind of shift */
2260                IRTemp  rSt,       /* shift amount */
2261                UInt    rM,        /* only for debug printing */
2262                UInt    rS         /* only for debug printing */
2263             )
2264 {
2265    vassert(how < 4);
2266    switch (how) {
2267       case 0: { /* LSL */
2268          compute_result_and_C_after_LSL_by_reg(
2269             buf, res, newC, rMt, rSt, rM, rS
2270          );
2271          break;
2272       }
2273       case 1: { /* LSR */
2274          compute_result_and_C_after_LSR_by_reg(
2275             buf, res, newC, rMt, rSt, rM, rS
2276          );
2277          break;
2278       }
2279       case 2: { /* ASR */
2280          compute_result_and_C_after_ASR_by_reg(
2281             buf, res, newC, rMt, rSt, rM, rS
2282          );
2283          break;
2284       }
2285       case 3: { /* ROR */
2286          compute_result_and_C_after_ROR_by_reg(
2287              buf, res, newC, rMt, rSt, rM, rS
2288          );
2289          break;
2290       }
2291       default:
2292          /*NOTREACHED*/
2293          vassert(0);
2294    }
2295 }
2296
2297
2298 /* Generate an expression corresponding to a shifter_operand, bind it
2299    to a temporary, and return that via *shop.  If shco is non-NULL,
2300    also compute a value for the shifter's carry out (in the LSB of a
2301    word), bind it to a temporary, and return that via *shco.
2302
2303    If for some reason we can't come up with a shifter operand (missing
2304    case?  not really a shifter operand?) return False.
2305
2306    Generates GETs from the guest state and is therefore not safe to
2307    use once we start doing PUTs to it, for any given instruction.
2308
2309    For ARM insns only; not for Thumb.
2310 */
2311 static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
2312                                  /*OUT*/IRTemp* shop,
2313                                  /*OUT*/IRTemp* shco,
2314                                  /*OUT*/HChar* buf )
2315 {
2316    UInt insn_4 = (insn_11_0 >> 4) & 1;
2317    UInt insn_7 = (insn_11_0 >> 7) & 1;
2318    vassert(insn_25 <= 0x1);
2319    vassert(insn_11_0 <= 0xFFF);
2320
2321    vassert(shop && *shop == IRTemp_INVALID);
2322    *shop = newTemp(Ity_I32);
2323
2324    if (shco) {
2325       vassert(*shco == IRTemp_INVALID);
2326       *shco = newTemp(Ity_I32);
2327    }
2328
2329    /* 32-bit immediate */
2330
2331    if (insn_25 == 1) {
2332       /* immediate: (7:0) rotated right by 2 * (11:8) */
2333       UInt imm = (insn_11_0 >> 0) & 0xFF;
2334       UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
2335       vassert(rot <= 30);
2336       imm = ROR32(imm, rot);
2337       if (shco) {
2338          if (rot == 0) {
2339             assign( *shco, mk_armg_calculate_flag_c() );
2340          } else {
2341             assign( *shco, mkU32( (imm >> 31) & 1 ) );
2342          }
2343       }
2344       DIS(buf, "#0x%x", imm);
2345       assign( *shop, mkU32(imm) );
2346       return True;
2347    }
2348
2349    /* Shift/rotate by immediate */
2350
2351    if (insn_25 == 0 && insn_4 == 0) {
2352       /* Rm (3:0) shifted (6:5) by immediate (11:7) */
2353       UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
2354       UInt rM        = (insn_11_0 >> 0) & 0xF;
2355       UInt how       = (insn_11_0 >> 5) & 3;
2356       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2357       IRTemp rMt = newTemp(Ity_I32);
2358       assign(rMt, getIRegA(rM));
2359
2360       vassert(shift_amt <= 31);
2361
2362       compute_result_and_C_after_shift_by_imm5(
2363          buf, shop, shco, rMt, how, shift_amt, rM
2364       );
2365       return True;
2366    }
2367
2368    /* Shift/rotate by register */
2369    if (insn_25 == 0 && insn_4 == 1) {
2370       /* Rm (3:0) shifted (6:5) by Rs (11:8) */
2371       UInt rM  = (insn_11_0 >> 0) & 0xF;
2372       UInt rS  = (insn_11_0 >> 8) & 0xF;
2373       UInt how = (insn_11_0 >> 5) & 3;
2374       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2375       IRTemp rMt = newTemp(Ity_I32);
2376       IRTemp rSt = newTemp(Ity_I32);
2377
2378       if (insn_7 == 1)
2379          return False; /* not really a shifter operand */
2380
2381       assign(rMt, getIRegA(rM));
2382       assign(rSt, getIRegA(rS));
2383
2384       compute_result_and_C_after_shift_by_reg(
2385          buf, shop, shco, rMt, how, rSt, rM, rS
2386       );
2387       return True;
2388    }
2389
2390    vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
2391    return False;
2392 }
2393
2394
2395 /* ARM only */
2396 static
2397 IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
2398                                     /*OUT*/HChar* buf )
2399 {
2400    vassert(rN < 16);
2401    vassert(bU < 2);
2402    vassert(imm12 < 0x1000);
2403    HChar opChar = bU == 1 ? '+' : '-';
2404    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
2405    return
2406       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2407              getIRegA(rN),
2408              mkU32(imm12) );
2409 }
2410
2411
2412 /* ARM only.
2413    NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
2414 */
2415 static
2416 IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
2417                                           UInt sh2, UInt imm5,
2418                                           /*OUT*/HChar* buf )
2419 {
2420    vassert(rN < 16);
2421    vassert(bU < 2);
2422    vassert(rM < 16);
2423    vassert(sh2 < 4);
2424    vassert(imm5 < 32);
2425    HChar   opChar = bU == 1 ? '+' : '-';
2426    IRExpr* index  = NULL;
2427    switch (sh2) {
2428       case 0: /* LSL */
2429          /* imm5 can be in the range 0 .. 31 inclusive. */
2430          index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
2431          DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
2432          break;
2433       case 1: /* LSR */
2434          if (imm5 == 0) {
2435             index = mkU32(0);
2436             vassert(0); // ATC
2437          } else {
2438             index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
2439          }
2440          DIS(buf, "[r%u, %cr%u, LSR #%u]",
2441                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2442          break;
2443       case 2: /* ASR */
2444          /* Doesn't this just mean that the behaviour with imm5 == 0
2445             is the same as if it had been 31 ? */
2446          if (imm5 == 0) {
2447             index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
2448             vassert(0); // ATC
2449          } else {
2450             index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
2451          }
2452          DIS(buf, "[r%u, %cr%u, ASR #%u]",
2453                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2454          break;
2455       case 3: /* ROR or RRX */
2456          if (imm5 == 0) {
2457             IRTemp rmT    = newTemp(Ity_I32);
2458             IRTemp cflagT = newTemp(Ity_I32);
2459             assign(rmT, getIRegA(rM));
2460             assign(cflagT, mk_armg_calculate_flag_c());
2461             index = binop(Iop_Or32,
2462                           binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
2463                           binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
2464             DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
2465          } else {
2466             IRTemp rmT = newTemp(Ity_I32);
2467             assign(rmT, getIRegA(rM));
2468             vassert(imm5 >= 1 && imm5 <= 31);
2469             index = binop(Iop_Or32,
2470                           binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
2471                           binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
2472             DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
2473          }
2474          break;
2475       default:
2476          vassert(0);
2477    }
2478    vassert(index);
2479    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2480                 getIRegA(rN), index);
2481 }
2482
2483
2484 /* ARM only */
2485 static
2486 IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
2487                                    /*OUT*/HChar* buf )
2488 {
2489    vassert(rN < 16);
2490    vassert(bU < 2);
2491    vassert(imm8 < 0x100);
2492    HChar opChar = bU == 1 ? '+' : '-';
2493    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
2494    return
2495       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2496              getIRegA(rN),
2497              mkU32(imm8) );
2498 }
2499
2500
2501 /* ARM only */
2502 static
2503 IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
2504                                   /*OUT*/HChar* buf )
2505 {
2506    vassert(rN < 16);
2507    vassert(bU < 2);
2508    vassert(rM < 16);
2509    HChar   opChar = bU == 1 ? '+' : '-';
2510    IRExpr* index  = getIRegA(rM);
2511    DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
2512    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2513                 getIRegA(rN), index);
2514 }
2515
2516
2517 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2518    as an IRCmpF64Result.  Generate code to convert it to an
2519    ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
2520    Assign a new temp to hold that value, and return the temp. */
2521 static
2522 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
2523 {
2524    IRTemp ix       = newTemp(Ity_I32);
2525    IRTemp termL    = newTemp(Ity_I32);
2526    IRTemp termR    = newTemp(Ity_I32);
2527    IRTemp nzcv     = newTemp(Ity_I32);
2528
2529    /* This is where the fun starts.  We have to convert 'irRes' from
2530       an IR-convention return result (IRCmpF64Result) to an
2531       ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
2532       4 bits of 'nzcv'. */
2533    /* Map compare result from IR to ARM(nzcv) */
2534    /*
2535       FP cmp result | IR   | ARM(nzcv)
2536       --------------------------------
2537       UN              0x45   0011
2538       LT              0x01   1000
2539       GT              0x00   0010
2540       EQ              0x40   0110
2541    */
2542    /* Now since you're probably wondering WTF ..
2543
2544       ix fishes the useful bits out of the IR value, bits 6 and 0, and
2545       places them side by side, giving a number which is 0, 1, 2 or 3.
2546
2547       termL is a sequence cooked up by GNU superopt.  It converts ix
2548          into an almost correct value NZCV value (incredibly), except
2549          for the case of UN, where it produces 0100 instead of the
2550          required 0011.
2551
2552       termR is therefore a correction term, also computed from ix.  It
2553          is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
2554          the final correct value, we subtract termR from termL.
2555
2556       Don't take my word for it.  There's a test program at the bottom
2557       of this file, to try this out with.
2558    */
2559    assign(
2560       ix,
2561       binop(Iop_Or32,
2562             binop(Iop_And32,
2563                   binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
2564                   mkU32(3)),
2565             binop(Iop_And32, mkexpr(irRes), mkU32(1))));
2566
2567    assign(
2568       termL,
2569       binop(Iop_Add32,
2570             binop(Iop_Shr32,
2571                   binop(Iop_Sub32,
2572                         binop(Iop_Shl32,
2573                               binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
2574                               mkU8(30)),
2575                         mkU32(1)),
2576                   mkU8(29)),
2577             mkU32(1)));
2578
2579    assign(
2580       termR,
2581       binop(Iop_And32,
2582             binop(Iop_And32,
2583                   mkexpr(ix),
2584                   binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
2585             mkU32(1)));
2586
2587    assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
2588    return nzcv;
2589 }
2590
2591
2592 /* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
2593    updatesC is non-NULL, a boolean is written to it indicating whether
2594    or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
2595 */
2596 static UInt thumbExpandImm ( Bool* updatesC,
2597                              UInt imm1, UInt imm3, UInt imm8 )
2598 {
2599    vassert(imm1 < (1<<1));
2600    vassert(imm3 < (1<<3));
2601    vassert(imm8 < (1<<8));
2602    UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
2603    UInt abcdefgh = imm8;
2604    UInt lbcdefgh = imm8 | 0x80;
2605    if (updatesC) {
2606       *updatesC = i_imm3_a >= 8;
2607    }
2608    switch (i_imm3_a) {
2609       case 0: case 1:
2610          return abcdefgh;
2611       case 2: case 3:
2612          return (abcdefgh << 16) | abcdefgh;
2613       case 4: case 5:
2614          return (abcdefgh << 24) | (abcdefgh << 8);
2615       case 6: case 7:
2616          return (abcdefgh << 24) | (abcdefgh << 16)
2617                 | (abcdefgh << 8) | abcdefgh;
2618       case 8 ... 31:
2619          return lbcdefgh << (32 - i_imm3_a);
2620       default:
2621          break;
2622    }
2623    /*NOTREACHED*/vassert(0);
2624 }
2625
2626
2627 /* Version of thumbExpandImm where we simply feed it the
2628    instruction halfwords (the lowest addressed one is I0). */
2629 static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
2630                                         UShort i0s, UShort i1s )
2631 {
2632    UInt i0    = (UInt)i0s;
2633    UInt i1    = (UInt)i1s;
2634    UInt imm1  = SLICE_UInt(i0,10,10);
2635    UInt imm3  = SLICE_UInt(i1,14,12);
2636    UInt imm8  = SLICE_UInt(i1,7,0);
2637    return thumbExpandImm(updatesC, imm1, imm3, imm8);
2638 }
2639
2640
2641 /* Thumb16 only.  Given the firstcond and mask fields from an IT
2642    instruction, compute the 32-bit ITSTATE value implied, as described
2643    in libvex_guest_arm.h.  This is not the ARM ARM representation.
2644    Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
2645    disassembly printing.  Returns False if firstcond or mask
2646    denote something invalid.
2647
2648    The number and conditions for the instructions to be
2649    conditionalised depend on firstcond and mask:
2650
2651    mask      cond 1    cond 2      cond 3      cond 4
2652
2653    1000      fc[3:0]
2654    x100      fc[3:0]   fc[3:1]:x
2655    xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
2656    xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
2657
2658    The condition fields are assembled in *itstate backwards (cond 4 at
2659    the top, cond 1 at the bottom).  Conditions are << 4'd and then
2660    ^0xE'd, and those fields that correspond to instructions in the IT
2661    block are tagged with a 1 bit.
2662 */
2663 static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
2664                               /*OUT*/HChar* ch1,
2665                               /*OUT*/HChar* ch2,
2666                               /*OUT*/HChar* ch3,
2667                               UInt firstcond, UInt mask )
2668 {
2669    vassert(firstcond <= 0xF);
2670    vassert(mask <= 0xF);
2671    *itstate = 0;
2672    *ch1 = *ch2 = *ch3 = '.';
2673    if (mask == 0)
2674       return False; /* the logic below actually ensures this anyway,
2675                        but clearer to make it explicit. */
2676    if (firstcond == 0xF)
2677       return False; /* NV is not allowed */
2678    if (firstcond == 0xE && popcount32(mask) != 1)
2679       return False; /* if firstcond is AL then all the rest must be too */
2680
2681    UInt m3 = (mask >> 3) & 1;
2682    UInt m2 = (mask >> 2) & 1;
2683    UInt m1 = (mask >> 1) & 1;
2684    UInt m0 = (mask >> 0) & 1;
2685
2686    UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
2687    UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
2688
2689    if (m3 == 1 && (m2|m1|m0) == 0) {
2690       *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
2691       *itstate ^= 0xE0E0E0E0;
2692       return True;
2693    }
2694
2695    if (m2 == 1 && (m1|m0) == 0) {
2696       *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
2697       *itstate ^= 0xE0E0E0E0;
2698       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2699       return True;
2700    }
2701
2702    if (m1 == 1 && m0 == 0) {
2703       *itstate = (ni << 24)
2704                  | (setbit32(fc, 4, m2) << 16)
2705                  | (setbit32(fc, 4, m3) << 8) | fc;
2706       *itstate ^= 0xE0E0E0E0;
2707       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2708       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2709       return True;
2710    }
2711
2712    if (m0 == 1) {
2713       *itstate = (setbit32(fc, 4, m1) << 24)
2714                  | (setbit32(fc, 4, m2) << 16)
2715                  | (setbit32(fc, 4, m3) << 8) | fc;
2716       *itstate ^= 0xE0E0E0E0;
2717       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2718       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2719       *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
2720       return True;
2721    }
2722
2723    return False;
2724 }
2725
2726
2727 /* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
2728    Chapter 7 Section 1. */
2729 static IRTemp gen_BITREV ( IRTemp x0 )
2730 {
2731    IRTemp x1 = newTemp(Ity_I32);
2732    IRTemp x2 = newTemp(Ity_I32);
2733    IRTemp x3 = newTemp(Ity_I32);
2734    IRTemp x4 = newTemp(Ity_I32);
2735    IRTemp x5 = newTemp(Ity_I32);
2736    UInt   c1 = 0x55555555;
2737    UInt   c2 = 0x33333333;
2738    UInt   c3 = 0x0F0F0F0F;
2739    UInt   c4 = 0x00FF00FF;
2740    UInt   c5 = 0x0000FFFF;
2741    assign(x1,
2742           binop(Iop_Or32,
2743                 binop(Iop_Shl32,
2744                       binop(Iop_And32, mkexpr(x0), mkU32(c1)),
2745                       mkU8(1)),
2746                 binop(Iop_Shr32,
2747                       binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
2748                       mkU8(1))
2749    ));
2750    assign(x2,
2751           binop(Iop_Or32,
2752                 binop(Iop_Shl32,
2753                       binop(Iop_And32, mkexpr(x1), mkU32(c2)),
2754                       mkU8(2)),
2755                 binop(Iop_Shr32,
2756                       binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
2757                       mkU8(2))
2758    ));
2759    assign(x3,
2760           binop(Iop_Or32,
2761                 binop(Iop_Shl32,
2762                       binop(Iop_And32, mkexpr(x2), mkU32(c3)),
2763                       mkU8(4)),
2764                 binop(Iop_Shr32,
2765                       binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
2766                       mkU8(4))
2767    ));
2768    assign(x4,
2769           binop(Iop_Or32,
2770                 binop(Iop_Shl32,
2771                       binop(Iop_And32, mkexpr(x3), mkU32(c4)),
2772                       mkU8(8)),
2773                 binop(Iop_Shr32,
2774                       binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
2775                       mkU8(8))
2776    ));
2777    assign(x5,
2778           binop(Iop_Or32,
2779                 binop(Iop_Shl32,
2780                       binop(Iop_And32, mkexpr(x4), mkU32(c5)),
2781                       mkU8(16)),
2782                 binop(Iop_Shr32,
2783                       binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
2784                       mkU8(16))
2785    ));
2786    return x5;
2787 }
2788
2789
2790 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2791    0:1:2:3 (aka byte-swap). */
2792 static IRTemp gen_REV ( IRTemp arg )
2793 {
2794    IRTemp res = newTemp(Ity_I32);
2795    assign(res,
2796           binop(Iop_Or32,
2797                 binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
2798           binop(Iop_Or32,
2799                 binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2800                                  mkU32(0x00FF0000)),
2801           binop(Iop_Or32,
2802                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2803                                        mkU32(0x0000FF00)),
2804                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
2805                                        mkU32(0x000000FF) )
2806    ))));
2807    return res;
2808 }
2809
2810
2811 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2812    2:3:0:1 (swap within lo and hi halves). */
2813 static IRTemp gen_REV16 ( IRTemp arg )
2814 {
2815    IRTemp res = newTemp(Ity_I32);
2816    assign(res,
2817           binop(Iop_Or32,
2818                 binop(Iop_And32,
2819                       binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2820                       mkU32(0xFF00FF00)),
2821                 binop(Iop_And32,
2822                       binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2823                       mkU32(0x00FF00FF))));
2824    return res;
2825 }
2826
2827
2828 /*------------------------------------------------------------*/
2829 /*--- Advanced SIMD (NEON) instructions                    ---*/
2830 /*------------------------------------------------------------*/
2831
2832 /*------------------------------------------------------------*/
2833 /*--- NEON data processing                                 ---*/
2834 /*------------------------------------------------------------*/
2835
2836 /* For all NEON DP ops, we use the normal scheme to handle conditional
2837    writes to registers -- pass in condT and hand that on to the
2838    put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
2839    since NEON is unconditional for ARM.  In Thumb mode condT is
2840    derived from the ITSTATE shift register in the normal way. */
2841
2842 static
2843 UInt get_neon_d_regno(UInt theInstr)
2844 {
2845    UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
2846    if (theInstr & 0x40) {
2847       if (x & 1) {
2848          x = x + 0x100;
2849       } else {
2850          x = x >> 1;
2851       }
2852    }
2853    return x;
2854 }
2855
2856 static
2857 UInt get_neon_n_regno(UInt theInstr)
2858 {
2859    UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
2860    if (theInstr & 0x40) {
2861       if (x & 1) {
2862          x = x + 0x100;
2863       } else {
2864          x = x >> 1;
2865       }
2866    }
2867    return x;
2868 }
2869
2870 static
2871 UInt get_neon_m_regno(UInt theInstr)
2872 {
2873    UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
2874    if (theInstr & 0x40) {
2875       if (x & 1) {
2876          x = x + 0x100;
2877       } else {
2878          x = x >> 1;
2879       }
2880    }
2881    return x;
2882 }
2883
2884 static
2885 Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
2886 {
2887    UInt dreg = get_neon_d_regno(theInstr);
2888    UInt mreg = get_neon_m_regno(theInstr);
2889    UInt nreg = get_neon_n_regno(theInstr);
2890    UInt imm4 = (theInstr >> 8) & 0xf;
2891    UInt Q = (theInstr >> 6) & 1;
2892    HChar reg_t = Q ? 'q' : 'd';
2893
2894    if (Q) {
2895       putQReg(dreg, triop(Iop_SliceV128, /*hiV128*/getQReg(mreg),
2896                           /*loV128*/getQReg(nreg), mkU8(imm4)), condT);
2897    } else {
2898       putDRegI64(dreg, triop(Iop_Slice64, /*hiI64*/getDRegI64(mreg),
2899                              /*loI64*/getDRegI64(nreg), mkU8(imm4)), condT);
2900    }
2901    DIP("vext.8 %c%u, %c%u, %c%u, #%u\n", reg_t, dreg, reg_t, nreg,
2902                                          reg_t, mreg, imm4);
2903    return True;
2904 }
2905
2906 /* Generate specific vector FP binary ops, possibly with a fake
2907    rounding mode as required by the primop. */
2908 static
2909 IRExpr* binop_w_fake_RM ( IROp op, IRExpr* argL, IRExpr* argR )
2910 {
2911    switch (op) {
2912       case Iop_Add32Fx4:
2913       case Iop_Sub32Fx4:
2914       case Iop_Mul32Fx4:
2915          return triop(op, get_FAKE_roundingmode(), argL, argR );
2916       case Iop_Add32x4: case Iop_Add16x8:
2917       case Iop_Sub32x4: case Iop_Sub16x8:
2918       case Iop_Mul32x4: case Iop_Mul16x8:
2919       case Iop_Mul32x2: case Iop_Mul16x4:
2920       case Iop_Add32Fx2:
2921       case Iop_Sub32Fx2:
2922       case Iop_Mul32Fx2:
2923       case Iop_PwAdd32Fx2:
2924          return binop(op, argL, argR);
2925       default:
2926         ppIROp(op);
2927         vassert(0);
2928    }
2929 }
2930
2931 /* VTBL, VTBX */
2932 static
2933 Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
2934 {
2935    UInt op = (theInstr >> 6) & 1;
2936    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
2937    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
2938    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
2939    UInt len = (theInstr >> 8) & 3;
2940    Int i;
2941    IROp cmp;
2942    ULong imm;
2943    IRTemp arg_l;
2944    IRTemp old_mask, new_mask, cur_mask;
2945    IRTemp old_res, new_res;
2946    IRTemp old_arg, new_arg;
2947
2948    if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
2949       return False;
2950    if (nreg + len > 31)
2951       return False;
2952
2953    cmp = Iop_CmpGT8Ux8;
2954
2955    old_mask = newTemp(Ity_I64);
2956    old_res = newTemp(Ity_I64);
2957    old_arg = newTemp(Ity_I64);
2958    assign(old_mask, mkU64(0));
2959    assign(old_res, mkU64(0));
2960    assign(old_arg, getDRegI64(mreg));
2961    imm = 8;
2962    imm = (imm <<  8) | imm;
2963    imm = (imm << 16) | imm;
2964    imm = (imm << 32) | imm;
2965
2966    for (i = 0; i <= len; i++) {
2967       arg_l = newTemp(Ity_I64);
2968       new_mask = newTemp(Ity_I64);
2969       cur_mask = newTemp(Ity_I64);
2970       new_res = newTemp(Ity_I64);
2971       new_arg = newTemp(Ity_I64);
2972       assign(arg_l, getDRegI64(nreg+i));
2973       assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
2974       assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
2975       assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
2976       assign(new_res, binop(Iop_Or64,
2977                             mkexpr(old_res),
2978                             binop(Iop_And64,
2979                                   binop(Iop_Perm8x8,
2980                                         mkexpr(arg_l),
2981                                         binop(Iop_And64,
2982                                               mkexpr(old_arg),
2983                                               mkexpr(cur_mask))),
2984                                   mkexpr(cur_mask))));
2985
2986       old_arg = new_arg;
2987       old_mask = new_mask;
2988       old_res = new_res;
2989    }
2990    if (op) {
2991       new_res = newTemp(Ity_I64);
2992       assign(new_res, binop(Iop_Or64,
2993                             binop(Iop_And64,
2994                                   getDRegI64(dreg),
2995                                   unop(Iop_Not64, mkexpr(old_mask))),
2996                             mkexpr(old_res)));
2997       old_res = new_res;
2998    }
2999
3000    putDRegI64(dreg, mkexpr(old_res), condT);
3001    DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
3002    if (len > 0) {
3003       DIP("d%u-d%u", nreg, nreg + len);
3004    } else {
3005       DIP("d%u", nreg);
3006    }
3007    DIP("}, d%u\n", mreg);
3008    return True;
3009 }
3010
3011 /* VDUP (scalar)  */
3012 static
3013 Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
3014 {
3015    UInt Q = (theInstr >> 6) & 1;
3016    UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
3017    UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
3018    UInt imm4 = (theInstr >> 16) & 0xF;
3019    UInt index;
3020    UInt size;
3021    IRTemp arg_m;
3022    IRTemp res;
3023    IROp op, op2;
3024
3025    if ((imm4 == 0) || (imm4 == 8))
3026       return False;
3027    if ((Q == 1) && ((dreg & 1) == 1))
3028       return False;
3029    if (Q)
3030       dreg >>= 1;
3031    arg_m = newTemp(Ity_I64);
3032    assign(arg_m, getDRegI64(mreg));
3033    if (Q)
3034       res = newTemp(Ity_V128);
3035    else
3036       res = newTemp(Ity_I64);
3037    if ((imm4 & 1) == 1) {
3038       op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
3039       op2 = Iop_GetElem8x8;
3040       index = imm4 >> 1;
3041       size = 8;
3042    } else if ((imm4 & 3) == 2) {
3043       op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
3044       op2 = Iop_GetElem16x4;
3045       index = imm4 >> 2;
3046       size = 16;
3047    } else if ((imm4 & 7) == 4) {
3048       op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
3049       op2 = Iop_GetElem32x2;
3050       index = imm4 >> 3;
3051       size = 32;
3052    } else {
3053       return False; // can this ever happen?
3054    }
3055    assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
3056    if (Q) {
3057       putQReg(dreg, mkexpr(res), condT);
3058    } else {
3059       putDRegI64(dreg, mkexpr(res), condT);
3060    }
3061    DIP("vdup.%u %c%u, d%u[%u]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
3062    return True;
3063 }
3064
3065 /* A7.4.1 Three registers of the same length */
3066 static
3067 Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
3068 {
3069    /* In paths where this returns False, indicating a non-decodable
3070       instruction, there may still be some IR assignments to temporaries
3071       generated.  This is inconvenient but harmless, and the post-front-end
3072       IR optimisation pass will just remove them anyway.  So there's no
3073       effort made here to tidy it up.
3074    */
3075    UInt Q = (theInstr >> 6) & 1;
3076    UInt dreg = get_neon_d_regno(theInstr);
3077    UInt nreg = get_neon_n_regno(theInstr);
3078    UInt mreg = get_neon_m_regno(theInstr);
3079    UInt A = (theInstr >> 8) & 0xF;
3080    UInt B = (theInstr >> 4) & 1;
3081    UInt C = (theInstr >> 20) & 0x3;
3082    UInt U = (theInstr >> 24) & 1;
3083    UInt size = C;
3084
3085    IRTemp arg_n;
3086    IRTemp arg_m;
3087    IRTemp res;
3088
3089    if (Q) {
3090       arg_n = newTemp(Ity_V128);
3091       arg_m = newTemp(Ity_V128);
3092       res = newTemp(Ity_V128);
3093       assign(arg_n, getQReg(nreg));
3094       assign(arg_m, getQReg(mreg));
3095    } else {
3096       arg_n = newTemp(Ity_I64);
3097       arg_m = newTemp(Ity_I64);
3098       res = newTemp(Ity_I64);
3099       assign(arg_n, getDRegI64(nreg));
3100       assign(arg_m, getDRegI64(mreg));
3101    }
3102
3103    switch(A) {
3104       case 0:
3105          if (B == 0) {
3106             /* VHADD */
3107             ULong imm = 0;
3108             IRExpr *imm_val;
3109             IROp addOp;
3110             IROp andOp;
3111             IROp shOp;
3112             HChar regType = Q ? 'q' : 'd';
3113
3114             if (size == 3)
3115                return False;
3116             switch(size) {
3117                case 0: imm = 0x101010101010101LL; break;
3118                case 1: imm = 0x1000100010001LL; break;
3119                case 2: imm = 0x100000001LL; break;
3120                default: vassert(0);
3121             }
3122             if (Q) {
3123                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3124                andOp = Iop_AndV128;
3125             } else {
3126                imm_val = mkU64(imm);
3127                andOp = Iop_And64;
3128             }
3129             if (U) {
3130                switch(size) {
3131                   case 0:
3132                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3133                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3134                      break;
3135                   case 1:
3136                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3137                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3138                      break;
3139                   case 2:
3140                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3141                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3142                      break;
3143                   default:
3144                      vassert(0);
3145                }
3146             } else {
3147                switch(size) {
3148                   case 0:
3149                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3150                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3151                      break;
3152                   case 1:
3153                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3154                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3155                      break;
3156                   case 2:
3157                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3158                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3159                      break;
3160                   default:
3161                      vassert(0);
3162                }
3163             }
3164             assign(res,
3165                    binop(addOp,
3166                          binop(addOp,
3167                                binop(shOp, mkexpr(arg_m), mkU8(1)),
3168                                binop(shOp, mkexpr(arg_n), mkU8(1))),
3169                          binop(shOp,
3170                                binop(addOp,
3171                                      binop(andOp, mkexpr(arg_m), imm_val),
3172                                      binop(andOp, mkexpr(arg_n), imm_val)),
3173                                mkU8(1))));
3174             DIP("vhadd.%c%d %c%u, %c%u, %c%u\n",
3175                 U ? 'u' : 's', 8 << size, regType,
3176                 dreg, regType, nreg, regType, mreg);
3177          } else {
3178             /* VQADD */
3179             IROp op, op2;
3180             IRTemp tmp;
3181             HChar reg_t = Q ? 'q' : 'd';
3182             if (Q) {
3183                switch (size) {
3184                   case 0:
3185                      op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
3186                      op2 = Iop_Add8x16;
3187                      break;
3188                   case 1:
3189                      op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
3190                      op2 = Iop_Add16x8;
3191                      break;
3192                   case 2:
3193                      op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
3194                      op2 = Iop_Add32x4;
3195                      break;
3196                   case 3:
3197                      op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
3198                      op2 = Iop_Add64x2;
3199                      break;
3200                   default:
3201                      vassert(0);
3202                }
3203             } else {
3204                switch (size) {
3205                   case 0:
3206                      op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
3207                      op2 = Iop_Add8x8;
3208                      break;
3209                   case 1:
3210                      op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
3211                      op2 = Iop_Add16x4;
3212                      break;
3213                   case 2:
3214                      op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
3215                      op2 = Iop_Add32x2;
3216                      break;
3217                   case 3:
3218                      op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
3219                      op2 = Iop_Add64;
3220                      break;
3221                   default:
3222                      vassert(0);
3223                }
3224             }
3225             if (Q) {
3226                tmp = newTemp(Ity_V128);
3227             } else {
3228                tmp = newTemp(Ity_I64);
3229             }
3230             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3231             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3232             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3233             DIP("vqadd.%c%d %c%u %c%u, %c%u\n",
3234                 U ? 'u' : 's',
3235                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3236          }
3237          break;
3238       case 1:
3239          if (B == 0) {
3240             /* VRHADD */
3241             /* VRHADD C, A, B ::=
3242                  C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
3243             IROp shift_op, add_op;
3244             IRTemp cc;
3245             ULong one = 1;
3246             HChar reg_t = Q ? 'q' : 'd';
3247             switch (size) {
3248                case 0: one = (one <<  8) | one; /* fall through */
3249                case 1: one = (one << 16) | one; /* fall through */
3250                case 2: one = (one << 32) | one; break;
3251                case 3: return False;
3252                default: vassert(0);
3253             }
3254             if (Q) {
3255                switch (size) {
3256                   case 0:
3257                      shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
3258                      add_op = Iop_Add8x16;
3259                      break;
3260                   case 1:
3261                      shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
3262                      add_op = Iop_Add16x8;
3263                      break;
3264                   case 2:
3265                      shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
3266                      add_op = Iop_Add32x4;
3267                      break;
3268                   case 3:
3269                      return False;
3270                   default:
3271                      vassert(0);
3272                }
3273             } else {
3274                switch (size) {
3275                   case 0:
3276                      shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
3277                      add_op = Iop_Add8x8;
3278                      break;
3279                   case 1:
3280                      shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
3281                      add_op = Iop_Add16x4;
3282                      break;
3283                   case 2:
3284                      shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
3285                      add_op = Iop_Add32x2;
3286                      break;
3287                   case 3:
3288                      return False;
3289                   default:
3290                      vassert(0);
3291                }
3292             }
3293             if (Q) {
3294                cc = newTemp(Ity_V128);
3295                assign(cc, binop(shift_op,
3296                                 binop(add_op,
3297                                       binop(add_op,
3298                                             binop(Iop_AndV128,
3299                                                   mkexpr(arg_n),
3300                                                   binop(Iop_64HLtoV128,
3301                                                         mkU64(one),
3302                                                         mkU64(one))),
3303                                             binop(Iop_AndV128,
3304                                                   mkexpr(arg_m),
3305                                                   binop(Iop_64HLtoV128,
3306                                                         mkU64(one),
3307                                                         mkU64(one)))),
3308                                       binop(Iop_64HLtoV128,
3309                                             mkU64(one),
3310                                             mkU64(one))),
3311                                 mkU8(1)));
3312                assign(res, binop(add_op,
3313                                  binop(add_op,
3314                                        binop(shift_op,
3315                                              mkexpr(arg_n),
3316                                              mkU8(1)),
3317                                        binop(shift_op,
3318                                              mkexpr(arg_m),
3319                                              mkU8(1))),
3320                                  mkexpr(cc)));
3321             } else {
3322                cc = newTemp(Ity_I64);
3323                assign(cc, binop(shift_op,
3324                                 binop(add_op,
3325                                       binop(add_op,
3326                                             binop(Iop_And64,
3327                                                   mkexpr(arg_n),
3328                                                   mkU64(one)),
3329                                             binop(Iop_And64,
3330                                                   mkexpr(arg_m),
3331                                                   mkU64(one))),
3332                                       mkU64(one)),
3333                                 mkU8(1)));
3334                assign(res, binop(add_op,
3335                                  binop(add_op,
3336                                        binop(shift_op,
3337                                              mkexpr(arg_n),
3338                                              mkU8(1)),
3339                                        binop(shift_op,
3340                                              mkexpr(arg_m),
3341                                              mkU8(1))),
3342                                  mkexpr(cc)));
3343             }
3344             DIP("vrhadd.%c%d %c%u, %c%u, %c%u\n",
3345                 U ? 'u' : 's',
3346                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3347          } else {
3348             if (U == 0)  {
3349                switch(C) {
3350                   case 0: {
3351                      /* VAND  */
3352                      HChar reg_t = Q ? 'q' : 'd';
3353                      if (Q) {
3354                         assign(res, binop(Iop_AndV128, mkexpr(arg_n),
3355                                                        mkexpr(arg_m)));
3356                      } else {
3357                         assign(res, binop(Iop_And64, mkexpr(arg_n),
3358                                                      mkexpr(arg_m)));
3359                      }
3360                      DIP("vand %c%u, %c%u, %c%u\n",
3361                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
3362                      break;
3363                   }
3364                   case 1: {
3365                      /* VBIC  */
3366                      HChar reg_t = Q ? 'q' : 'd';
3367                      if (Q) {
3368                         assign(res, binop(Iop_AndV128,mkexpr(arg_n),
3369                                unop(Iop_NotV128, mkexpr(arg_m))));
3370                      } else {
3371                         assign(res, binop(Iop_And64, mkexpr(arg_n),
3372                                unop(Iop_Not64, mkexpr(arg_m))));
3373                      }
3374                      DIP("vbic %c%u, %c%u, %c%u\n",
3375                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
3376                      break;
3377                   }
3378                   case 2:
3379                      if ( nreg != mreg) {
3380                         /* VORR  */
3381                         HChar reg_t = Q ? 'q' : 'd';
3382                         if (Q) {
3383                            assign(res, binop(Iop_OrV128, mkexpr(arg_n),
3384                                                          mkexpr(arg_m)));
3385                         } else {
3386                            assign(res, binop(Iop_Or64, mkexpr(arg_n),
3387                                                        mkexpr(arg_m)));
3388                         }
3389                         DIP("vorr %c%u, %c%u, %c%u\n",
3390                             reg_t, dreg, reg_t, nreg, reg_t, mreg);
3391                      } else {
3392                         /* VMOV  */
3393                         HChar reg_t = Q ? 'q' : 'd';
3394                         assign(res, mkexpr(arg_m));
3395                         DIP("vmov %c%u, %c%u\n", reg_t, dreg, reg_t, mreg);
3396                      }
3397                      break;
3398                   case 3:{
3399                      /* VORN  */
3400                      HChar reg_t = Q ? 'q' : 'd';
3401                      if (Q) {
3402                         assign(res, binop(Iop_OrV128,mkexpr(arg_n),
3403                                unop(Iop_NotV128, mkexpr(arg_m))));
3404                      } else {
3405                         assign(res, binop(Iop_Or64, mkexpr(arg_n),
3406                                unop(Iop_Not64, mkexpr(arg_m))));
3407                      }
3408                      DIP("vorn %c%u, %c%u, %c%u\n",
3409                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
3410                      break;
3411                   }
3412                   default:
3413                      vassert(0);
3414                }
3415             } else {
3416                switch(C) {
3417                   case 0:
3418                      /* VEOR (XOR)  */
3419                      if (Q) {
3420                         assign(res, binop(Iop_XorV128, mkexpr(arg_n),
3421                                                        mkexpr(arg_m)));
3422                      } else {
3423                         assign(res, binop(Iop_Xor64, mkexpr(arg_n),
3424                                                      mkexpr(arg_m)));
3425                      }
3426                      DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
3427                            Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3428                      break;
3429                   case 1:
3430                      /* VBSL  */
3431                      if (Q) {
3432                         IRTemp reg_d = newTemp(Ity_V128);
3433                         assign(reg_d, getQReg(dreg));
3434                         assign(res,
3435                                binop(Iop_OrV128,
3436                                      binop(Iop_AndV128, mkexpr(arg_n),
3437                                                         mkexpr(reg_d)),
3438                                      binop(Iop_AndV128,
3439                                            mkexpr(arg_m),
3440                                            unop(Iop_NotV128,
3441                                                  mkexpr(reg_d)) ) ) );
3442                      } else {
3443                         IRTemp reg_d = newTemp(Ity_I64);
3444                         assign(reg_d, getDRegI64(dreg));
3445                         assign(res,
3446                                binop(Iop_Or64,
3447                                      binop(Iop_And64, mkexpr(arg_n),
3448                                                       mkexpr(reg_d)),
3449                                      binop(Iop_And64,
3450                                            mkexpr(arg_m),
3451                                            unop(Iop_Not64, mkexpr(reg_d)))));
3452                      }
3453                      DIP("vbsl %c%u, %c%u, %c%u\n",
3454                          Q ? 'q' : 'd', dreg,
3455                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3456                      break;
3457                   case 2:
3458                      /* VBIT  */
3459                      if (Q) {
3460                         IRTemp reg_d = newTemp(Ity_V128);
3461                         assign(reg_d, getQReg(dreg));
3462                         assign(res,
3463                                binop(Iop_OrV128,
3464                                      binop(Iop_AndV128, mkexpr(arg_n),
3465                                                         mkexpr(arg_m)),
3466                                      binop(Iop_AndV128,
3467                                            mkexpr(reg_d),
3468                                            unop(Iop_NotV128, mkexpr(arg_m)))));
3469                      } else {
3470                         IRTemp reg_d = newTemp(Ity_I64);
3471                         assign(reg_d, getDRegI64(dreg));
3472                         assign(res,
3473                                binop(Iop_Or64,
3474                                      binop(Iop_And64, mkexpr(arg_n),
3475                                                       mkexpr(arg_m)),
3476                                      binop(Iop_And64,
3477                                            mkexpr(reg_d),
3478                                            unop(Iop_Not64, mkexpr(arg_m)))));
3479                      }
3480                      DIP("vbit %c%u, %c%u, %c%u\n",
3481                          Q ? 'q' : 'd', dreg,
3482                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3483                      break;
3484                   case 3:
3485                      /* VBIF  */
3486                      if (Q) {
3487                         IRTemp reg_d = newTemp(Ity_V128);
3488                         assign(reg_d, getQReg(dreg));
3489                         assign(res,
3490                                binop(Iop_OrV128,
3491                                      binop(Iop_AndV128, mkexpr(reg_d),
3492                                                         mkexpr(arg_m)),
3493                                      binop(Iop_AndV128,
3494                                            mkexpr(arg_n),
3495                                            unop(Iop_NotV128, mkexpr(arg_m)))));
3496                      } else {
3497                         IRTemp reg_d = newTemp(Ity_I64);
3498                         assign(reg_d, getDRegI64(dreg));
3499                         assign(res,
3500                                binop(Iop_Or64,
3501                                      binop(Iop_And64, mkexpr(reg_d),
3502                                                       mkexpr(arg_m)),
3503                                      binop(Iop_And64,
3504                                            mkexpr(arg_n),
3505                                            unop(Iop_Not64, mkexpr(arg_m)))));
3506                      }
3507                      DIP("vbif %c%u, %c%u, %c%u\n",
3508                          Q ? 'q' : 'd', dreg,
3509                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3510                      break;
3511                   default:
3512                      vassert(0);
3513                }
3514             }
3515          }
3516          break;
3517       case 2:
3518          if (B == 0) {
3519             /* VHSUB */
3520             /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
3521             ULong imm = 0;
3522             IRExpr *imm_val;
3523             IROp subOp;
3524             IROp notOp;
3525             IROp andOp;
3526             IROp shOp;
3527             if (size == 3)
3528                return False;
3529             switch(size) {
3530                case 0: imm = 0x101010101010101LL; break;
3531                case 1: imm = 0x1000100010001LL; break;
3532                case 2: imm = 0x100000001LL; break;
3533                default: vassert(0);
3534             }
3535             if (Q) {
3536                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3537                andOp = Iop_AndV128;
3538                notOp = Iop_NotV128;
3539             } else {
3540                imm_val = mkU64(imm);
3541                andOp = Iop_And64;
3542                notOp = Iop_Not64;
3543             }
3544             if (U) {
3545                switch(size) {
3546                   case 0:
3547                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3548                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3549                      break;
3550                   case 1:
3551                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3552                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3553                      break;
3554                   case 2:
3555                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3556                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3557                      break;
3558                   default:
3559                      vassert(0);
3560                }
3561             } else {
3562                switch(size) {
3563                   case 0:
3564                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3565                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3566                      break;
3567                   case 1:
3568                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3569                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3570                      break;
3571                   case 2:
3572                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3573                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3574                      break;
3575                   default:
3576                      vassert(0);
3577                }
3578             }
3579             assign(res,
3580                    binop(subOp,
3581                          binop(subOp,
3582                                binop(shOp, mkexpr(arg_n), mkU8(1)),
3583                                binop(shOp, mkexpr(arg_m), mkU8(1))),
3584                          binop(andOp,
3585                                binop(andOp,
3586                                      unop(notOp, mkexpr(arg_n)),
3587                                      mkexpr(arg_m)),
3588                                imm_val)));
3589             DIP("vhsub.%c%d %c%u, %c%u, %c%u\n",
3590                 U ? 'u' : 's', 8 << size,
3591                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3592                 mreg);
3593          } else {
3594             /* VQSUB */
3595             IROp op, op2;
3596             IRTemp tmp;
3597             if (Q) {
3598                switch (size) {
3599                   case 0:
3600                      op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
3601                      op2 = Iop_Sub8x16;
3602                      break;
3603                   case 1:
3604                      op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
3605                      op2 = Iop_Sub16x8;
3606                      break;
3607                   case 2:
3608                      op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
3609                      op2 = Iop_Sub32x4;
3610                      break;
3611                   case 3:
3612                      op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
3613                      op2 = Iop_Sub64x2;
3614                      break;
3615                   default:
3616                      vassert(0);
3617                }
3618             } else {
3619                switch (size) {
3620                   case 0:
3621                      op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
3622                      op2 = Iop_Sub8x8;
3623                      break;
3624                   case 1:
3625                      op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
3626                      op2 = Iop_Sub16x4;
3627                      break;
3628                   case 2:
3629                      op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
3630                      op2 = Iop_Sub32x2;
3631                      break;
3632                   case 3:
3633                      op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
3634                      op2 = Iop_Sub64;
3635                      break;
3636                   default:
3637                      vassert(0);
3638                }
3639             }
3640             if (Q)
3641                tmp = newTemp(Ity_V128);
3642             else
3643                tmp = newTemp(Ity_I64);
3644             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3645             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3646             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3647             DIP("vqsub.%c%d %c%u, %c%u, %c%u\n",
3648                 U ? 'u' : 's', 8 << size,
3649                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3650                 mreg);
3651          }
3652          break;
3653       case 3: {
3654             IROp op;
3655             if (Q) {
3656                switch (size) {
3657                   case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
3658                   case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
3659                   case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
3660                   case 3: return False;
3661                   default: vassert(0);
3662                }
3663             } else {
3664                switch (size) {
3665                   case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
3666                   case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
3667                   case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
3668                   case 3: return False;
3669                   default: vassert(0);
3670                }
3671             }
3672             if (B == 0) {
3673                /* VCGT  */
3674                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3675                DIP("vcgt.%c%d %c%u, %c%u, %c%u\n",
3676                    U ? 'u' : 's', 8 << size,
3677                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3678                    mreg);
3679             } else {
3680                /* VCGE  */
3681                /* VCGE res, argn, argm
3682                     is equal to
3683                   VCGT tmp, argm, argn
3684                   VNOT res, tmp */
3685                assign(res,
3686                       unop(Q ? Iop_NotV128 : Iop_Not64,
3687                            binop(op, mkexpr(arg_m), mkexpr(arg_n))));
3688                DIP("vcge.%c%d %c%u, %c%u, %c%u\n",
3689                    U ? 'u' : 's', 8 << size,
3690                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3691                    mreg);
3692             }
3693          }
3694          break;
3695       case 4:
3696          if (B == 0) {
3697             /* VSHL */
3698             IROp op = Iop_INVALID, sub_op = Iop_INVALID;
3699             IRTemp tmp = IRTemp_INVALID;
3700             if (U) {
3701                switch (size) {
3702                   case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
3703                   case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
3704                   case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
3705                   case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
3706                   default: vassert(0);
3707                }
3708             } else {
3709                tmp = newTemp(Q ? Ity_V128 : Ity_I64);
3710                switch (size) {
3711                   case 0:
3712                      op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3713                      sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3714                      break;
3715                   case 1:
3716                      op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3717                      sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3718                      break;
3719                   case 2:
3720                      op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3721                      sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3722                      break;
3723                   case 3:
3724                      op = Q ? Iop_Sar64x2 : Iop_Sar64;
3725                      sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
3726                      break;
3727                   default:
3728                      vassert(0);
3729                }
3730             }
3731             if (U) {
3732                if (!Q && (size == 3))
3733                   assign(res, binop(op, mkexpr(arg_m),
3734                                         unop(Iop_64to8, mkexpr(arg_n))));
3735                else
3736                   assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3737             } else {
3738                if (Q)
3739                   assign(tmp, binop(sub_op,
3740                                     binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
3741                                     mkexpr(arg_n)));
3742                else
3743                   assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
3744                if (!Q && (size == 3))
3745                   assign(res, binop(op, mkexpr(arg_m),
3746                                         unop(Iop_64to8, mkexpr(tmp))));
3747                else
3748                   assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
3749             }
3750             DIP("vshl.%c%d %c%u, %c%u, %c%u\n",
3751                 U ? 'u' : 's', 8 << size,
3752                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3753                 nreg);
3754          } else {
3755             /* VQSHL */
3756             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
3757             IRTemp tmp, shval, mask, old_shval;
3758             UInt i;
3759             ULong esize;
3760             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
3761             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3762             if (U) {
3763                switch (size) {
3764                   case 0:
3765                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
3766                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
3767                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3768                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3769                      break;
3770                   case 1:
3771                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
3772                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
3773                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3774                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3775                      break;
3776                   case 2:
3777                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
3778                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
3779                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3780                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3781                      break;
3782                   case 3:
3783                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
3784                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
3785                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3786                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3787                      break;
3788                   default:
3789                      vassert(0);
3790                }
3791             } else {
3792                switch (size) {
3793                   case 0:
3794                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
3795                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3796                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3797                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3798                      break;
3799                   case 1:
3800                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
3801                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3802                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3803                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3804                      break;
3805                   case 2:
3806                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
3807                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3808                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3809                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3810                      break;
3811                   case 3:
3812                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
3813                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
3814                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3815                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3816                      break;
3817                   default:
3818                      vassert(0);
3819                }
3820             }
3821             if (Q) {
3822                tmp = newTemp(Ity_V128);
3823                shval = newTemp(Ity_V128);
3824                mask = newTemp(Ity_V128);
3825             } else {
3826                tmp = newTemp(Ity_I64);
3827                shval = newTemp(Ity_I64);
3828                mask = newTemp(Ity_I64);
3829             }
3830             assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3831             /* Only least significant byte from second argument is used.
3832                Copy this byte to the whole vector element. */
3833             assign(shval, binop(op_shrn,
3834                                 binop(op_shln,
3835                                        mkexpr(arg_n),
3836                                        mkU8((8 << size) - 8)),
3837                                 mkU8((8 << size) - 8)));
3838             for(i = 0; i < size; i++) {
3839                old_shval = shval;
3840                shval = newTemp(Q ? Ity_V128 : Ity_I64);
3841                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3842                                    mkexpr(old_shval),
3843                                    binop(op_shln,
3844                                          mkexpr(old_shval),
3845                                          mkU8(8 << i))));
3846             }
3847             /* If shift is greater or equal to the element size and
3848                element is non-zero, then QC flag should be set. */
3849             esize = (8 << size) - 1;
3850             esize = (esize <<  8) | esize;
3851             esize = (esize << 16) | esize;
3852             esize = (esize << 32) | esize;
3853             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3854                              binop(cmp_gt, mkexpr(shval),
3855                                            Q ? mkU128(esize) : mkU64(esize)),
3856                              unop(cmp_neq, mkexpr(arg_m))),
3857                        Q ? mkU128(0) : mkU64(0),
3858                        Q, condT);
3859             /* Othervise QC flag should be set if shift value is positive and
3860                result beign rightshifted the same value is not equal to left
3861                argument. */
3862             assign(mask, binop(cmp_gt, mkexpr(shval),
3863                                        Q ? mkU128(0) : mkU64(0)));
3864             if (!Q && size == 3)
3865                assign(tmp, binop(op_rev, mkexpr(res),
3866                                          unop(Iop_64to8, mkexpr(arg_n))));
3867             else
3868                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
3869             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3870                              mkexpr(tmp), mkexpr(mask)),
3871                        binop(Q ? Iop_AndV128 : Iop_And64,
3872                              mkexpr(arg_m), mkexpr(mask)),
3873                        Q, condT);
3874             DIP("vqshl.%c%d %c%u, %c%u, %c%u\n",
3875                 U ? 'u' : 's', 8 << size,
3876                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3877                 nreg);
3878          }
3879          break;
3880       case 5:
3881          if (B == 0) {
3882             /* VRSHL */
3883             IROp op, op_shrn, op_shln, cmp_gt, op_add;
3884             IRTemp shval, old_shval, imm_val, round;
3885             UInt i;
3886             ULong imm;
3887             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3888             imm = 1L;
3889             switch (size) {
3890                case 0: imm = (imm <<  8) | imm; /* fall through */
3891                case 1: imm = (imm << 16) | imm; /* fall through */
3892                case 2: imm = (imm << 32) | imm; /* fall through */
3893                case 3: break;
3894                default: vassert(0);
3895             }
3896             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
3897             round = newTemp(Q ? Ity_V128 : Ity_I64);
3898             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
3899             if (U) {
3900                switch (size) {
3901                   case 0:
3902                      op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
3903                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3904                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3905                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3906                      break;
3907                   case 1:
3908                      op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
3909                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3910                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3911                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3912                      break;
3913                   case 2:
3914                      op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
3915                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3916                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3917                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3918                      break;
3919                   case 3:
3920                      op = Q ? Iop_Shl64x2 : Iop_Shl64;
3921                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
3922                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3923                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3924                      break;
3925                   default:
3926                      vassert(0);
3927                }
3928             } else {
3929                switch (size) {
3930                   case 0:
3931                      op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
3932                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3933                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3934                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3935                      break;
3936                   case 1:
3937                      op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
3938                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3939                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3940                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3941                      break;
3942                   case 2:
3943                      op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
3944                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3945                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3946                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3947                      break;
3948                   case 3:
3949                      op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
3950                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
3951                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3952                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3953                      break;
3954                   default:
3955                      vassert(0);
3956                }
3957             }
3958             if (Q) {
3959                shval = newTemp(Ity_V128);
3960             } else {
3961                shval = newTemp(Ity_I64);
3962             }
3963             /* Only least significant byte from second argument is used.
3964                Copy this byte to the whole vector element. */
3965             assign(shval, binop(op_shrn,
3966                                 binop(op_shln,
3967                                        mkexpr(arg_n),
3968                                        mkU8((8 << size) - 8)),
3969                                 mkU8((8 << size) - 8)));
3970             for (i = 0; i < size; i++) {
3971                old_shval = shval;
3972                shval = newTemp(Q ? Ity_V128 : Ity_I64);
3973                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3974                                    mkexpr(old_shval),
3975                                    binop(op_shln,
3976                                          mkexpr(old_shval),
3977                                          mkU8(8 << i))));
3978             }
3979             /* Compute the result */
3980             if (!Q && size == 3 && U) {
3981                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3982                                    binop(op,
3983                                          mkexpr(arg_m),
3984                                          unop(Iop_64to8,
3985                                               binop(op_add,
3986                                                     mkexpr(arg_n),
3987                                                     mkexpr(imm_val)))),
3988                                    binop(Q ? Iop_AndV128 : Iop_And64,
3989                                          mkexpr(imm_val),
3990                                          binop(cmp_gt,
3991                                                Q ? mkU128(0) : mkU64(0),
3992                                                mkexpr(arg_n)))));
3993                assign(res, binop(op_add,
3994                                  binop(op,
3995                                        mkexpr(arg_m),
3996                                        unop(Iop_64to8, mkexpr(arg_n))),
3997                                  mkexpr(round)));
3998             } else {
3999                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
4000                                    binop(op,
4001                                          mkexpr(arg_m),
4002                                          binop(op_add,
4003                                                mkexpr(arg_n),
4004                                                mkexpr(imm_val))),
4005                                    binop(Q ? Iop_AndV128 : Iop_And64,
4006                                          mkexpr(imm_val),
4007                                          binop(cmp_gt,
4008                                                Q ? mkU128(0) : mkU64(0),
4009                                                mkexpr(arg_n)))));
4010                assign(res, binop(op_add,
4011                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4012                                  mkexpr(round)));
4013             }
4014             DIP("vrshl.%c%d %c%u, %c%u, %c%u\n",
4015                 U ? 'u' : 's', 8 << size,
4016                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
4017                 nreg);
4018          } else {
4019             /* VQRSHL */
4020             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
4021             IRTemp tmp, shval, mask, old_shval, imm_val, round;
4022             UInt i;
4023             ULong esize, imm;
4024             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
4025             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
4026             imm = 1L;
4027             switch (size) {
4028                case 0: imm = (imm <<  8) | imm; /* fall through */
4029                case 1: imm = (imm << 16) | imm; /* fall through */
4030                case 2: imm = (imm << 32) | imm; /* fall through */
4031                case 3: break;
4032                default: vassert(0);
4033             }
4034             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
4035             round = newTemp(Q ? Ity_V128 : Ity_I64);
4036             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
4037             if (U) {
4038                switch (size) {
4039                   case 0:
4040                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
4041                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4042                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
4043                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4044                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4045                      break;
4046                   case 1:
4047                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
4048                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4049                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
4050                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4051                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4052                      break;
4053                   case 2:
4054                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
4055                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4056                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
4057                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4058                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4059                      break;
4060                   case 3:
4061                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
4062                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
4063                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
4064                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4065                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4066                      break;
4067                   default:
4068                      vassert(0);
4069                }
4070             } else {
4071                switch (size) {
4072                   case 0:
4073                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
4074                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4075                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
4076                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4077                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4078                      break;
4079                   case 1:
4080                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
4081                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4082                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
4083                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4084                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4085                      break;
4086                   case 2:
4087                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
4088                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4089                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
4090                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4091                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4092                      break;
4093                   case 3:
4094                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
4095                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
4096                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
4097                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4098                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4099                      break;
4100                   default:
4101                      vassert(0);
4102                }
4103             }
4104             if (Q) {
4105                tmp = newTemp(Ity_V128);
4106                shval = newTemp(Ity_V128);
4107                mask = newTemp(Ity_V128);
4108             } else {
4109                tmp = newTemp(Ity_I64);
4110                shval = newTemp(Ity_I64);
4111                mask = newTemp(Ity_I64);
4112             }
4113             /* Only least significant byte from second argument is used.
4114                Copy this byte to the whole vector element. */
4115             assign(shval, binop(op_shrn,
4116                                 binop(op_shln,
4117                                        mkexpr(arg_n),
4118                                        mkU8((8 << size) - 8)),
4119                                 mkU8((8 << size) - 8)));
4120             for (i = 0; i < size; i++) {
4121                old_shval = shval;
4122                shval = newTemp(Q ? Ity_V128 : Ity_I64);
4123                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
4124                                    mkexpr(old_shval),
4125                                    binop(op_shln,
4126                                          mkexpr(old_shval),
4127                                          mkU8(8 << i))));
4128             }
4129             /* Compute the result */
4130             assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
4131                                 binop(op,
4132                                       mkexpr(arg_m),
4133                                       binop(op_add,
4134                                             mkexpr(arg_n),
4135                                             mkexpr(imm_val))),
4136                                 binop(Q ? Iop_AndV128 : Iop_And64,
4137                                       mkexpr(imm_val),
4138                                       binop(cmp_gt,
4139                                             Q ? mkU128(0) : mkU64(0),
4140                                             mkexpr(arg_n)))));
4141             assign(res, binop(op_add,
4142                               binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4143                               mkexpr(round)));
4144             /* If shift is greater or equal to the element size and element is
4145                non-zero, then QC flag should be set. */
4146             esize = (8 << size) - 1;
4147             esize = (esize <<  8) | esize;
4148             esize = (esize << 16) | esize;
4149             esize = (esize << 32) | esize;
4150             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4151                              binop(cmp_gt, mkexpr(shval),
4152                                            Q ? mkU128(esize) : mkU64(esize)),
4153                              unop(cmp_neq, mkexpr(arg_m))),
4154                        Q ? mkU128(0) : mkU64(0),
4155                        Q, condT);
4156             /* Othervise QC flag should be set if shift value is positive and
4157                result beign rightshifted the same value is not equal to left
4158                argument. */
4159             assign(mask, binop(cmp_gt, mkexpr(shval),
4160                                Q ? mkU128(0) : mkU64(0)));
4161             if (!Q && size == 3)
4162                assign(tmp, binop(op_rev, mkexpr(res),
4163                                          unop(Iop_64to8, mkexpr(arg_n))));
4164             else
4165                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
4166             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4167                              mkexpr(tmp), mkexpr(mask)),
4168                        binop(Q ? Iop_AndV128 : Iop_And64,
4169                              mkexpr(arg_m), mkexpr(mask)),
4170                        Q, condT);
4171             DIP("vqrshl.%c%d %c%u, %c%u, %c%u\n",
4172                 U ? 'u' : 's', 8 << size,
4173                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
4174                 nreg);
4175          }
4176          break;
4177       case 6:
4178          /* VMAX, VMIN  */
4179          if (B == 0) {
4180             /* VMAX */
4181             IROp op;
4182             if (U == 0) {
4183                switch (size) {
4184                   case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
4185                   case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
4186                   case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
4187                   case 3: return False;
4188                   default: vassert(0);
4189                }
4190             } else {
4191                switch (size) {
4192                   case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
4193                   case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
4194                   case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
4195                   case 3: return False;
4196                   default: vassert(0);
4197                }
4198             }
4199             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4200             DIP("vmax.%c%d %c%u, %c%u, %c%u\n",
4201                 U ? 'u' : 's', 8 << size,
4202                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4203                 mreg);
4204          } else {
4205             /* VMIN */
4206             IROp op;
4207             if (U == 0) {
4208                switch (size) {
4209                   case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
4210                   case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
4211                   case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
4212                   case 3: return False;
4213                   default: vassert(0);
4214                }
4215             } else {
4216                switch (size) {
4217                   case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
4218                   case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
4219                   case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
4220                   case 3: return False;
4221                   default: vassert(0);
4222                }
4223             }
4224             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4225             DIP("vmin.%c%d %c%u, %c%u, %c%u\n",
4226                 U ? 'u' : 's', 8 << size,
4227                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4228                 mreg);
4229          }
4230          break;
4231       case 7:
4232          if (B == 0) {
4233             /* VABD */
4234             IROp op_cmp, op_sub;
4235             IRTemp cond;
4236             if ((theInstr >> 23) & 1) {
4237                vpanic("VABDL should not be in dis_neon_data_3same\n");
4238             }
4239             if (Q) {
4240                switch (size) {
4241                   case 0:
4242                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4243                      op_sub = Iop_Sub8x16;
4244                      break;
4245                   case 1:
4246                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4247                      op_sub = Iop_Sub16x8;
4248                      break;
4249                   case 2:
4250                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4251                      op_sub = Iop_Sub32x4;
4252                      break;
4253                   case 3:
4254                      return False;
4255                   default:
4256                      vassert(0);
4257                }
4258             } else {
4259                switch (size) {
4260                   case 0:
4261                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4262                      op_sub = Iop_Sub8x8;
4263                      break;
4264                   case 1:
4265                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4266                      op_sub = Iop_Sub16x4;
4267                      break;
4268                   case 2:
4269                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4270                      op_sub = Iop_Sub32x2;
4271                      break;
4272                   case 3:
4273                      return False;
4274                   default:
4275                      vassert(0);
4276                }
4277             }
4278             if (Q) {
4279                cond = newTemp(Ity_V128);
4280             } else {
4281                cond = newTemp(Ity_I64);
4282             }
4283             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4284             assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
4285                               binop(Q ? Iop_AndV128 : Iop_And64,
4286                                     binop(op_sub, mkexpr(arg_n),
4287                                                   mkexpr(arg_m)),
4288                                     mkexpr(cond)),
4289                               binop(Q ? Iop_AndV128 : Iop_And64,
4290                                     binop(op_sub, mkexpr(arg_m),
4291                                                   mkexpr(arg_n)),
4292                                     unop(Q ? Iop_NotV128 : Iop_Not64,
4293                                          mkexpr(cond)))));
4294             DIP("vabd.%c%d %c%u, %c%u, %c%u\n",
4295                 U ? 'u' : 's', 8 << size,
4296                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4297                 mreg);
4298          } else {
4299             /* VABA */
4300             IROp op_cmp, op_sub, op_add;
4301             IRTemp cond, acc, tmp;
4302             if ((theInstr >> 23) & 1) {
4303                vpanic("VABAL should not be in dis_neon_data_3same");
4304             }
4305             if (Q) {
4306                switch (size) {
4307                   case 0:
4308                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4309                      op_sub = Iop_Sub8x16;
4310                      op_add = Iop_Add8x16;
4311                      break;
4312                   case 1:
4313                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4314                      op_sub = Iop_Sub16x8;
4315                      op_add = Iop_Add16x8;
4316                      break;
4317                   case 2:
4318                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4319                      op_sub = Iop_Sub32x4;
4320                      op_add = Iop_Add32x4;
4321                      break;
4322                   case 3:
4323                      return False;
4324                   default:
4325                      vassert(0);
4326                }
4327             } else {
4328                switch (size) {
4329                   case 0:
4330                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4331                      op_sub = Iop_Sub8x8;
4332                      op_add = Iop_Add8x8;
4333                      break;
4334                   case 1:
4335                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4336                      op_sub = Iop_Sub16x4;
4337                      op_add = Iop_Add16x4;
4338                      break;
4339                   case 2:
4340                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4341                      op_sub = Iop_Sub32x2;
4342                      op_add = Iop_Add32x2;
4343                      break;
4344                   case 3:
4345                      return False;
4346                   default:
4347                      vassert(0);
4348                }
4349             }
4350             if (Q) {
4351                cond = newTemp(Ity_V128);
4352                acc = newTemp(Ity_V128);
4353                tmp = newTemp(Ity_V128);
4354                assign(acc, getQReg(dreg));
4355             } else {
4356                cond = newTemp(Ity_I64);
4357                acc = newTemp(Ity_I64);
4358                tmp = newTemp(Ity_I64);
4359                assign(acc, getDRegI64(dreg));
4360             }
4361             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4362             assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
4363                               binop(Q ? Iop_AndV128 : Iop_And64,
4364                                     binop(op_sub, mkexpr(arg_n),
4365                                                   mkexpr(arg_m)),
4366                                     mkexpr(cond)),
4367                               binop(Q ? Iop_AndV128 : Iop_And64,
4368                                     binop(op_sub, mkexpr(arg_m),
4369                                                   mkexpr(arg_n)),
4370                                     unop(Q ? Iop_NotV128 : Iop_Not64,
4371                                          mkexpr(cond)))));
4372             assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
4373             DIP("vaba.%c%d %c%u, %c%u, %c%u\n",
4374                 U ? 'u' : 's', 8 << size,
4375                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4376                 mreg);
4377          }
4378          break;
4379       case 8:
4380          if (B == 0) {
4381             IROp op;
4382             if (U == 0) {
4383                /* VADD  */
4384                switch (size) {
4385                   case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
4386                   case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
4387                   case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
4388                   case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
4389                   default: vassert(0);
4390                }
4391                DIP("vadd.i%d %c%u, %c%u, %c%u\n",
4392                    8 << size, Q ? 'q' : 'd',
4393                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4394             } else {
4395                /* VSUB  */
4396                switch (size) {
4397                   case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
4398                   case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
4399                   case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
4400                   case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
4401                   default: vassert(0);
4402                }
4403                DIP("vsub.i%d %c%u, %c%u, %c%u\n",
4404                    8 << size, Q ? 'q' : 'd',
4405                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4406             }
4407             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4408          } else {
4409             IROp op;
4410             switch (size) {
4411                case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
4412                case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
4413                case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
4414                case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
4415                default: vassert(0);
4416             }
4417             if (U == 0) {
4418                /* VTST  */
4419                assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
4420                                           mkexpr(arg_n),
4421                                           mkexpr(arg_m))));
4422                DIP("vtst.%d %c%u, %c%u, %c%u\n",
4423                    8 << size, Q ? 'q' : 'd',
4424                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4425             } else {
4426                /* VCEQ  */
4427                assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
4428                                 unop(op,
4429                                      binop(Q ? Iop_XorV128 : Iop_Xor64,
4430                                            mkexpr(arg_n),
4431                                            mkexpr(arg_m)))));
4432                DIP("vceq.i%d %c%u, %c%u, %c%u\n",
4433                    8 << size, Q ? 'q' : 'd',
4434                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4435             }
4436          }
4437          break;
4438       case 9:
4439          if (B == 0) {
4440             /* VMLA, VMLS (integer) */
4441             IROp op, op2;
4442             UInt P = (theInstr >> 24) & 1;
4443             if (P) {
4444                switch (size) {
4445                   case 0:
4446                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4447                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
4448                      break;
4449                   case 1:
4450                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4451                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
4452                      break;
4453                   case 2:
4454                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4455                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
4456                      break;
4457                   case 3:
4458                      return False;
4459                   default:
4460                      vassert(0);
4461                }
4462             } else {
4463                switch (size) {
4464                   case 0:
4465                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4466                      op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
4467                      break;
4468                   case 1:
4469                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4470                      op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
4471                      break;
4472                   case 2:
4473                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4474                      op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
4475                      break;
4476                   case 3:
4477                      return False;
4478                   default:
4479                      vassert(0);
4480                }
4481             }
4482             assign(res, binop(op2,
4483                               Q ? getQReg(dreg) : getDRegI64(dreg),
4484                               binop(op, mkexpr(arg_n), mkexpr(arg_m))));
4485             DIP("vml%c.i%d %c%u, %c%u, %c%u\n",
4486                 P ? 's' : 'a', 8 << size,
4487                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4488                 mreg);
4489          } else {
4490             /* VMUL */
4491             IROp op;
4492             UInt P = (theInstr >> 24) & 1;
4493             if (P) {
4494                switch (size) {
4495                   case 0:
4496                      op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
4497                      break;
4498                   case 1: case 2: case 3: return False;
4499                   default: vassert(0);
4500                }
4501             } else {
4502                switch (size) {
4503                   case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
4504                   case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
4505                   case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
4506                   case 3: return False;
4507                   default: vassert(0);
4508                }
4509             }
4510             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4511             DIP("vmul.%c%d %c%u, %c%u, %c%u\n",
4512                 P ? 'p' : 'i', 8 << size,
4513                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4514                 mreg);
4515          }
4516          break;
4517       case 10: {
4518          /* VPMAX, VPMIN  */
4519          UInt P = (theInstr >> 4) & 1;
4520          IROp op;
4521          if (Q)
4522             return False;
4523          if (P) {
4524             switch (size) {
4525                case 0: op = U ? Iop_PwMin8Ux8  : Iop_PwMin8Sx8; break;
4526                case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
4527                case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
4528                case 3: return False;
4529                default: vassert(0);
4530             }
4531          } else {
4532             switch (size) {
4533                case 0: op = U ? Iop_PwMax8Ux8  : Iop_PwMax8Sx8; break;
4534                case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
4535                case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
4536                case 3: return False;
4537                default: vassert(0);
4538             }
4539          }
4540          assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4541          DIP("vp%s.%c%d %c%u, %c%u, %c%u\n",
4542              P ? "min" : "max", U ? 'u' : 's',
4543              8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4544              Q ? 'q' : 'd', mreg);
4545          break;
4546       }
4547       case 11:
4548          if (B == 0) {
4549             if (U == 0) {
4550                /* VQDMULH  */
4551                IROp op ,op2;
4552                ULong imm;
4553                switch (size) {
4554                   case 0: case 3:
4555                      return False;
4556                   case 1:
4557                      op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
4558                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4559                      imm = 1LL << 15;
4560                      imm = (imm << 16) | imm;
4561                      imm = (imm << 32) | imm;
4562                      break;
4563                   case 2:
4564                      op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
4565                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4566                      imm = 1LL << 31;
4567                      imm = (imm << 32) | imm;
4568                      break;
4569                   default:
4570                      vassert(0);
4571                }
4572                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4573                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4574                                 binop(op2, mkexpr(arg_n),
4575                                            Q ? mkU128(imm) : mkU64(imm)),
4576                                 binop(op2, mkexpr(arg_m),
4577                                            Q ? mkU128(imm) : mkU64(imm))),
4578                           Q ? mkU128(0) : mkU64(0),
4579                           Q, condT);
4580                DIP("vqdmulh.s%d %c%u, %c%u, %c%u\n",
4581                    8 << size, Q ? 'q' : 'd',
4582                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4583             } else {
4584                /* VQRDMULH */
4585                IROp op ,op2;
4586                ULong imm;
4587                switch(size) {
4588                   case 0: case 3:
4589                      return False;
4590                   case 1:
4591                      imm = 1LL << 15;
4592                      imm = (imm << 16) | imm;
4593                      imm = (imm << 32) | imm;
4594                      op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
4595                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4596                      break;
4597                   case 2:
4598                      imm = 1LL << 31;
4599                      imm = (imm << 32) | imm;
4600                      op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
4601                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4602                      break;
4603                   default:
4604                      vassert(0);
4605                }
4606                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4607                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4608                                 binop(op2, mkexpr(arg_n),
4609                                            Q ? mkU128(imm) : mkU64(imm)),
4610                                 binop(op2, mkexpr(arg_m),
4611                                            Q ? mkU128(imm) : mkU64(imm))),
4612                           Q ? mkU128(0) : mkU64(0),
4613                           Q, condT);
4614                DIP("vqrdmulh.s%d %c%u, %c%u, %c%u\n",
4615                    8 << size, Q ? 'q' : 'd',
4616                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4617             }
4618          } else {
4619             if (U == 0) {
4620                /* VPADD */
4621                IROp op;
4622                if (Q)
4623                   return False;
4624                switch (size) {
4625                   case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8;  break;
4626                   case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
4627                   case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
4628                   case 3: return False;
4629                   default: vassert(0);
4630                }
4631                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4632                DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
4633                    8 << size, Q ? 'q' : 'd',
4634                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4635             } else {
4636                return False;
4637             }
4638          }
4639          break;
4640       case 12: {
4641          return False;
4642       }
4643       /* Starting from here these are FP SIMD cases */
4644       case 13:
4645          if (B == 0) {
4646             IROp op;
4647             if (U == 0) {
4648                if ((C >> 1) == 0) {
4649                   /* VADD  */
4650                   op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
4651                   DIP("vadd.f32 %c%u, %c%u, %c%u\n",
4652                       Q ? 'q' : 'd', dreg,
4653                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4654                } else {
4655                   /* VSUB  */
4656                   op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
4657                   DIP("vsub.f32 %c%u, %c%u, %c%u\n",
4658                       Q ? 'q' : 'd', dreg,
4659                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4660                }
4661             } else {
4662                if ((C >> 1) == 0) {
4663                   /* VPADD */
4664                   if (Q)
4665                      return False;
4666                   op = Iop_PwAdd32Fx2;
4667                   DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4668                } else {
4669                   /* VABD  */
4670                   if (Q) {
4671                      assign(res, unop(Iop_Abs32Fx4,
4672                                       triop(Iop_Sub32Fx4,
4673                                             get_FAKE_roundingmode(),
4674                                             mkexpr(arg_n),
4675                                             mkexpr(arg_m))));
4676                   } else {
4677                      assign(res, unop(Iop_Abs32Fx2,
4678                                       binop(Iop_Sub32Fx2,
4679                                             mkexpr(arg_n),
4680                                             mkexpr(arg_m))));
4681                   }
4682                   DIP("vabd.f32 %c%u, %c%u, %c%u\n",
4683                       Q ? 'q' : 'd', dreg,
4684                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4685                   break;
4686                }
4687             }
4688             assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4689          } else {
4690             if (U == 0) {
4691                /* VMLA, VMLS  */
4692                IROp op, op2;
4693                UInt P = (theInstr >> 21) & 1;
4694                if (P) {
4695                   switch (size & 1) {
4696                      case 0:
4697                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4698                         op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
4699                         break;
4700                      case 1: return False;
4701                      default: vassert(0);
4702                   }
4703                } else {
4704                   switch (size & 1) {
4705                      case 0:
4706                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4707                         op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
4708                         break;
4709                      case 1: return False;
4710                      default: vassert(0);
4711                   }
4712                }
4713                assign(res, binop_w_fake_RM(
4714                               op2,
4715                               Q ? getQReg(dreg) : getDRegI64(dreg),
4716                               binop_w_fake_RM(op, mkexpr(arg_n),
4717                                                   mkexpr(arg_m))));
4718
4719                DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
4720                    P ? 's' : 'a', Q ? 'q' : 'd',
4721                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4722             } else {
4723                /* VMUL  */
4724                IROp op;
4725                if ((C >> 1) != 0)
4726                   return False;
4727                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
4728                assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4729                DIP("vmul.f32 %c%u, %c%u, %c%u\n",
4730                    Q ? 'q' : 'd', dreg,
4731                    Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4732             }
4733          }
4734          break;
4735       case 14:
4736          if (B == 0) {
4737             if (U == 0) {
4738                if ((C >> 1) == 0) {
4739                   /* VCEQ  */
4740                   IROp op;
4741                   if ((theInstr >> 20) & 1)
4742                      return False;
4743                   op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2;
4744                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4745                   DIP("vceq.f32 %c%u, %c%u, %c%u\n",
4746                       Q ? 'q' : 'd', dreg,
4747                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4748                } else {
4749                   return False;
4750                }
4751             } else {
4752                if ((C >> 1) == 0) {
4753                   /* VCGE  */
4754                   IROp op;
4755                   if ((theInstr >> 20) & 1)
4756                      return False;
4757                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4758                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4759                   DIP("vcge.f32 %c%u, %c%u, %c%u\n",
4760                       Q ? 'q' : 'd', dreg,
4761                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4762                } else {
4763                   /* VCGT  */
4764                   IROp op;
4765                   if ((theInstr >> 20) & 1)
4766                      return False;
4767                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4768                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4769                   DIP("vcgt.f32 %c%u, %c%u, %c%u\n",
4770                       Q ? 'q' : 'd', dreg,
4771                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4772                }
4773             }
4774          } else {
4775             if (U == 1) {
4776                /* VACGE, VACGT */
4777                UInt op_bit = (theInstr >> 21) & 1;
4778                IROp op, op2;
4779                op2 = Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2;
4780                if (op_bit) {
4781                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4782                   assign(res, binop(op,
4783                                     unop(op2, mkexpr(arg_n)),
4784                                     unop(op2, mkexpr(arg_m))));
4785                } else {
4786                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4787                   assign(res, binop(op,
4788                                     unop(op2, mkexpr(arg_n)),
4789                                     unop(op2, mkexpr(arg_m))));
4790                }
4791                DIP("vacg%c.f32 %c%u, %c%u, %c%u\n", op_bit ? 't' : 'e',
4792                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4793                    Q ? 'q' : 'd', mreg);
4794             } else {
4795                return False;
4796             }
4797          }
4798          break;
4799       case 15:
4800          if (B == 0) {
4801             if (U == 0) {
4802                /* VMAX, VMIN  */
4803                IROp op;
4804                if ((theInstr >> 20) & 1)
4805                   return False;
4806                if ((theInstr >> 21) & 1) {
4807                   op = Q ? Iop_Min32Fx4 : Iop_Min32Fx2;
4808                   DIP("vmin.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4809                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4810                } else {
4811                   op = Q ? Iop_Max32Fx4 : Iop_Max32Fx2;
4812                   DIP("vmax.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4813                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4814                }
4815                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4816             } else {
4817                /* VPMAX, VPMIN   */
4818                IROp op;
4819                if (Q)
4820                   return False;
4821                if ((theInstr >> 20) & 1)
4822                   return False;
4823                if ((theInstr >> 21) & 1) {
4824                   op = Iop_PwMin32Fx2;
4825                   DIP("vpmin.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4826                } else {
4827                   op = Iop_PwMax32Fx2;
4828                   DIP("vpmax.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4829                }
4830                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4831             }
4832          } else {
4833             if (U == 0) {
4834                if ((C >> 1) == 0) {
4835                   /* VRECPS */
4836                   if ((theInstr >> 20) & 1)
4837                      return False;
4838                   assign(res, binop(Q ? Iop_RecipStep32Fx4
4839                                       : Iop_RecipStep32Fx2,
4840                                     mkexpr(arg_n),
4841                                     mkexpr(arg_m)));
4842                   DIP("vrecps.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4843                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4844                } else {
4845                   /* VRSQRTS  */
4846                   if ((theInstr >> 20) & 1)
4847                      return False;
4848                   assign(res, binop(Q ? Iop_RSqrtStep32Fx4
4849                                       : Iop_RSqrtStep32Fx2,
4850                                     mkexpr(arg_n),
4851                                     mkexpr(arg_m)));
4852                   DIP("vrsqrts.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4853                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4854                }
4855             } else {
4856                return False;
4857             }
4858          }
4859          break;
4860       default:
4861          /*NOTREACHED*/
4862          vassert(0);
4863    }
4864
4865    if (Q) {
4866       putQReg(dreg, mkexpr(res), condT);
4867    } else {
4868       putDRegI64(dreg, mkexpr(res), condT);
4869    }
4870
4871    return True;
4872 }
4873
4874 /* A7.4.2 Three registers of different length */
4875 static
4876 Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
4877 {
4878    /* In paths where this returns False, indicating a non-decodable
4879       instruction, there may still be some IR assignments to temporaries
4880       generated.  This is inconvenient but harmless, and the post-front-end
4881       IR optimisation pass will just remove them anyway.  So there's no
4882       effort made here to tidy it up.
4883    */
4884    UInt A = (theInstr >> 8) & 0xf;
4885    UInt B = (theInstr >> 20) & 3;
4886    UInt U = (theInstr >> 24) & 1;
4887    UInt P = (theInstr >> 9) & 1;
4888    UInt mreg = get_neon_m_regno(theInstr);
4889    UInt nreg = get_neon_n_regno(theInstr);
4890    UInt dreg = get_neon_d_regno(theInstr);
4891    UInt size = B;
4892    ULong imm;
4893    IRTemp res, arg_m, arg_n, cond, tmp;
4894    IROp cvt, cvt2, cmp, op, op2, sh, add;
4895    switch (A) {
4896       case 0: case 1: case 2: case 3:
4897          /* VADDL, VADDW, VSUBL, VSUBW */
4898          if (dreg & 1)
4899             return False;
4900          dreg >>= 1;
4901          size = B;
4902          switch (size) {
4903             case 0:
4904                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4905                op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
4906                break;
4907             case 1:
4908                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
4909                op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
4910                break;
4911             case 2:
4912                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
4913                op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
4914                break;
4915             case 3:
4916                return False;
4917             default:
4918                vassert(0);
4919          }
4920          arg_n = newTemp(Ity_V128);
4921          arg_m = newTemp(Ity_V128);
4922          if (A & 1) {
4923             if (nreg & 1)
4924                return False;
4925             nreg >>= 1;
4926             assign(arg_n, getQReg(nreg));
4927          } else {
4928             assign(arg_n, unop(cvt, getDRegI64(nreg)));
4929          }
4930          assign(arg_m, unop(cvt, getDRegI64(mreg)));
4931          putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
4932                        condT);
4933          DIP("v%s%c.%c%d q%u, %c%u, d%u\n", (A & 2) ? "sub" : "add",
4934              (A & 1) ? 'w' : 'l', U ? 'u' : 's', 8 << size, dreg,
4935              (A & 1) ? 'q' : 'd', nreg, mreg);
4936          return True;
4937       case 4:
4938          /* VADDHN, VRADDHN */
4939          if (mreg & 1)
4940             return False;
4941          mreg >>= 1;
4942          if (nreg & 1)
4943             return False;
4944          nreg >>= 1;
4945          size = B;
4946          switch (size) {
4947             case 0:
4948                op = Iop_Add16x8;
4949                cvt = Iop_NarrowUn16to8x8;
4950                sh = Iop_ShrN16x8;
4951                imm = 1U << 7;
4952                imm = (imm << 16) | imm;
4953                imm = (imm << 32) | imm;
4954                break;
4955             case 1:
4956                op = Iop_Add32x4;
4957                cvt = Iop_NarrowUn32to16x4;
4958                sh = Iop_ShrN32x4;
4959                imm = 1U << 15;
4960                imm = (imm << 32) | imm;
4961                break;
4962             case 2:
4963                op = Iop_Add64x2;
4964                cvt = Iop_NarrowUn64to32x2;
4965                sh = Iop_ShrN64x2;
4966                imm = 1U << 31;
4967                break;
4968             case 3:
4969                return False;
4970             default:
4971                vassert(0);
4972          }
4973          tmp = newTemp(Ity_V128);
4974          res = newTemp(Ity_V128);
4975          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
4976          if (U) {
4977             /* VRADDHN */
4978             assign(res, binop(op, mkexpr(tmp),
4979                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
4980          } else {
4981             assign(res, mkexpr(tmp));
4982          }
4983          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
4984                     condT);
4985          DIP("v%saddhn.i%d d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
4986              nreg, mreg);
4987          return True;
4988       case 5:
4989          /* VABAL */
4990          if (!((theInstr >> 23) & 1)) {
4991             vpanic("VABA should not be in dis_neon_data_3diff\n");
4992          }
4993          if (dreg & 1)
4994             return False;
4995          dreg >>= 1;
4996          switch (size) {
4997             case 0:
4998                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4999                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
5000                cvt2 = Iop_Widen8Sto16x8;
5001                op = Iop_Sub16x8;
5002                op2 = Iop_Add16x8;
5003                break;
5004             case 1:
5005                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
5006                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
5007                cvt2 = Iop_Widen16Sto32x4;
5008                op = Iop_Sub32x4;
5009                op2 = Iop_Add32x4;
5010                break;
5011             case 2:
5012                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
5013                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
5014                cvt2 = Iop_Widen32Sto64x2;
5015                op = Iop_Sub64x2;
5016                op2 = Iop_Add64x2;
5017                break;
5018             case 3:
5019                return False;
5020             default:
5021                vassert(0);
5022          }
5023          arg_n = newTemp(Ity_V128);
5024          arg_m = newTemp(Ity_V128);
5025          cond = newTemp(Ity_V128);
5026          res = newTemp(Ity_V128);
5027          assign(arg_n, unop(cvt, getDRegI64(nreg)));
5028          assign(arg_m, unop(cvt, getDRegI64(mreg)));
5029          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
5030                                             getDRegI64(mreg))));
5031          assign(res, binop(op2,
5032                            binop(Iop_OrV128,
5033                                  binop(Iop_AndV128,
5034                                        binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5035                                        mkexpr(cond)),
5036                                  binop(Iop_AndV128,
5037                                        binop(op, mkexpr(arg_m), mkexpr(arg_n)),
5038                                        unop(Iop_NotV128, mkexpr(cond)))),
5039                            getQReg(dreg)));
5040          putQReg(dreg, mkexpr(res), condT);
5041          DIP("vabal.%c%d q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
5042              nreg, mreg);
5043          return True;
5044       case 6:
5045          /* VSUBHN, VRSUBHN */
5046          if (mreg & 1)
5047             return False;
5048          mreg >>= 1;
5049          if (nreg & 1)
5050             return False;
5051          nreg >>= 1;
5052          size = B;
5053          switch (size) {
5054             case 0:
5055                op = Iop_Sub16x8;
5056                op2 = Iop_Add16x8;
5057                cvt = Iop_NarrowUn16to8x8;
5058                sh = Iop_ShrN16x8;
5059                imm = 1U << 7;
5060                imm = (imm << 16) | imm;
5061                imm = (imm << 32) | imm;
5062                break;
5063             case 1:
5064                op = Iop_Sub32x4;
5065                op2 = Iop_Add32x4;
5066                cvt = Iop_NarrowUn32to16x4;
5067                sh = Iop_ShrN32x4;
5068                imm = 1U << 15;
5069                imm = (imm << 32) | imm;
5070                break;
5071             case 2:
5072                op = Iop_Sub64x2;
5073                op2 = Iop_Add64x2;
5074                cvt = Iop_NarrowUn64to32x2;
5075                sh = Iop_ShrN64x2;
5076                imm = 1U << 31;
5077                break;
5078             case 3:
5079                return False;
5080             default:
5081                vassert(0);
5082          }
5083          tmp = newTemp(Ity_V128);
5084          res = newTemp(Ity_V128);
5085          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
5086          if (U) {
5087             /* VRSUBHN */
5088             assign(res, binop(op2, mkexpr(tmp),
5089                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
5090          } else {
5091             assign(res, mkexpr(tmp));
5092          }
5093          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
5094                     condT);
5095          DIP("v%ssubhn.i%d d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
5096              nreg, mreg);
5097          return True;
5098       case 7:
5099          /* VABDL */
5100          if (!((theInstr >> 23) & 1)) {
5101             vpanic("VABL should not be in dis_neon_data_3diff\n");
5102          }
5103          if (dreg & 1)
5104             return False;
5105          dreg >>= 1;
5106          switch (size) {
5107             case 0:
5108                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
5109                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
5110                cvt2 = Iop_Widen8Sto16x8;
5111                op = Iop_Sub16x8;
5112                break;
5113             case 1:
5114                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
5115                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
5116                cvt2 = Iop_Widen16Sto32x4;
5117                op = Iop_Sub32x4;
5118                break;
5119             case 2:
5120                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
5121                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
5122                cvt2 = Iop_Widen32Sto64x2;
5123                op = Iop_Sub64x2;
5124                break;
5125             case 3:
5126                return False;
5127             default:
5128                vassert(0);
5129          }
5130          arg_n = newTemp(Ity_V128);
5131          arg_m = newTemp(Ity_V128);
5132          cond = newTemp(Ity_V128);
5133          res = newTemp(Ity_V128);
5134          assign(arg_n, unop(cvt, getDRegI64(nreg)));
5135          assign(arg_m, unop(cvt, getDRegI64(mreg)));
5136          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
5137                                             getDRegI64(mreg))));
5138          assign(res, binop(Iop_OrV128,
5139                            binop(Iop_AndV128,
5140                                  binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5141                                  mkexpr(cond)),
5142                            binop(Iop_AndV128,
5143                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
5144                                  unop(Iop_NotV128, mkexpr(cond)))));
5145          putQReg(dreg, mkexpr(res), condT);
5146          DIP("vabdl.%c%d q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
5147              nreg, mreg);
5148          return True;
5149       case 8:
5150       case 10:
5151          /* VMLAL, VMLSL (integer) */
5152          if (dreg & 1)
5153             return False;
5154          dreg >>= 1;
5155          size = B;
5156          switch (size) {
5157             case 0:
5158                op = U ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5159                op2 = P ? Iop_Sub16x8 : Iop_Add16x8;
5160                break;
5161             case 1:
5162                op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5163                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5164                break;
5165             case 2:
5166                op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5167                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5168                break;
5169             case 3:
5170                return False;
5171             default:
5172                vassert(0);
5173          }
5174          res = newTemp(Ity_V128);
5175          assign(res, binop(op, getDRegI64(nreg),getDRegI64(mreg)));
5176          putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5177          DIP("vml%cl.%c%d q%u, d%u, d%u\n", P ? 's' : 'a', U ? 'u' : 's',
5178              8 << size, dreg, nreg, mreg);
5179          return True;
5180       case 9:
5181       case 11:
5182          /* VQDMLAL, VQDMLSL */
5183          if (U)
5184             return False;
5185          if (dreg & 1)
5186             return False;
5187          dreg >>= 1;
5188          size = B;
5189          switch (size) {
5190             case 0: case 3:
5191                return False;
5192             case 1:
5193                op = Iop_QDMull16Sx4;
5194                cmp = Iop_CmpEQ16x4;
5195                add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5196                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5197                imm = 1LL << 15;
5198                imm = (imm << 16) | imm;
5199                imm = (imm << 32) | imm;
5200                break;
5201             case 2:
5202                op = Iop_QDMull32Sx2;
5203                cmp = Iop_CmpEQ32x2;
5204                add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5205                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5206                imm = 1LL << 31;
5207                imm = (imm << 32) | imm;
5208                break;
5209             default:
5210                vassert(0);
5211          }
5212          res = newTemp(Ity_V128);
5213          tmp = newTemp(Ity_V128);
5214          assign(res, binop(op, getDRegI64(nreg), getDRegI64(mreg)));
5215          assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5216          setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5217                     True, condT);
5218          setFlag_QC(binop(Iop_And64,
5219                           binop(cmp, getDRegI64(nreg), mkU64(imm)),
5220                           binop(cmp, getDRegI64(mreg), mkU64(imm))),
5221                     mkU64(0),
5222                     False, condT);
5223          putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5224          DIP("vqdml%cl.s%d q%u, d%u, d%u\n", P ? 's' : 'a', 8 << size, dreg,
5225              nreg, mreg);
5226          return True;
5227       case 12:
5228       case 14:
5229          /* VMULL (integer or polynomial) */
5230          if (dreg & 1)
5231             return False;
5232          dreg >>= 1;
5233          size = B;
5234          switch (size) {
5235             case 0:
5236                op = (U) ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5237                if (P)
5238                   op = Iop_PolynomialMull8x8;
5239                break;
5240             case 1:
5241                if (P) return False;
5242                op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5243                break;
5244             case 2:
5245                if (P) return False;
5246                op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5247                break;
5248             case 3:
5249                return False;
5250             default:
5251                vassert(0);
5252          }
5253          putQReg(dreg, binop(op, getDRegI64(nreg),
5254                                  getDRegI64(mreg)), condT);
5255          DIP("vmull.%c%d q%u, d%u, d%u\n", P ? 'p' : (U ? 'u' : 's'),
5256                8 << size, dreg, nreg, mreg);
5257          return True;
5258       case 13:
5259          /* VQDMULL */
5260          if (U)
5261             return False;
5262          if (dreg & 1)
5263             return False;
5264          dreg >>= 1;
5265          size = B;
5266          switch (size) {
5267             case 0:
5268             case 3:
5269                return False;
5270             case 1:
5271                op = Iop_QDMull16Sx4;
5272                op2 = Iop_CmpEQ16x4;
5273                imm = 1LL << 15;
5274                imm = (imm << 16) | imm;
5275                imm = (imm << 32) | imm;
5276                break;
5277             case 2:
5278                op = Iop_QDMull32Sx2;
5279                op2 = Iop_CmpEQ32x2;
5280                imm = 1LL << 31;
5281                imm = (imm << 32) | imm;
5282                break;
5283             default:
5284                vassert(0);
5285          }
5286          putQReg(dreg, binop(op, getDRegI64(nreg), getDRegI64(mreg)),
5287                condT);
5288          setFlag_QC(binop(Iop_And64,
5289                           binop(op2, getDRegI64(nreg), mkU64(imm)),
5290                           binop(op2, getDRegI64(mreg), mkU64(imm))),
5291                     mkU64(0),
5292                     False, condT);
5293          DIP("vqdmull.s%d q%u, d%u, d%u\n", 8 << size, dreg, nreg, mreg);
5294          return True;
5295       default:
5296          return False;
5297    }
5298    return False;
5299 }
5300
5301 /* A7.4.3 Two registers and a scalar */
5302 static
5303 Bool dis_neon_data_2reg_and_scalar ( UInt theInstr, IRTemp condT )
5304 {
5305 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
5306    UInt U = INSN(24,24);
5307    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
5308    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
5309    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
5310    UInt size = INSN(21,20);
5311    UInt index;
5312    UInt Q = INSN(24,24);
5313
5314    if (INSN(27,25) != 1 || INSN(23,23) != 1
5315        || INSN(6,6) != 1 || INSN(4,4) != 0)
5316       return False;
5317
5318    /* VMLA, VMLS (scalar)  */
5319    if ((INSN(11,8) & BITS4(1,0,1,0)) == BITS4(0,0,0,0)) {
5320       IRTemp res, arg_m, arg_n;
5321       IROp dup, get, op, op2, add, sub;
5322       if (Q) {
5323          if ((dreg & 1) || (nreg & 1))
5324             return False;
5325          dreg >>= 1;
5326          nreg >>= 1;
5327          res = newTemp(Ity_V128);
5328          arg_m = newTemp(Ity_V128);
5329          arg_n = newTemp(Ity_V128);
5330          assign(arg_n, getQReg(nreg));
5331          switch(size) {
5332             case 1:
5333                dup = Iop_Dup16x8;
5334                get = Iop_GetElem16x4;
5335                index = mreg >> 3;
5336                mreg &= 7;
5337                break;
5338             case 2:
5339                dup = Iop_Dup32x4;
5340                get = Iop_GetElem32x2;
5341                index = mreg >> 4;
5342                mreg &= 0xf;
5343                break;
5344             case 0:
5345             case 3:
5346                return False;
5347             default:
5348                vassert(0);
5349          }
5350          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5351       } else {
5352          res = newTemp(Ity_I64);
5353          arg_m = newTemp(Ity_I64);
5354          arg_n = newTemp(Ity_I64);
5355          assign(arg_n, getDRegI64(nreg));
5356          switch(size) {
5357             case 1:
5358                dup = Iop_Dup16x4;
5359                get = Iop_GetElem16x4;
5360                index = mreg >> 3;
5361                mreg &= 7;
5362                break;
5363             case 2:
5364                dup = Iop_Dup32x2;
5365                get = Iop_GetElem32x2;
5366                index = mreg >> 4;
5367                mreg &= 0xf;
5368                break;
5369             case 0:
5370             case 3:
5371                return False;
5372             default:
5373                vassert(0);
5374          }
5375          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5376       }
5377       if (INSN(8,8)) {
5378          switch (size) {
5379             case 2:
5380                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5381                add = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
5382                sub = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
5383                break;
5384             case 0:
5385             case 1:
5386             case 3:
5387                return False;
5388             default:
5389                vassert(0);
5390          }
5391       } else {
5392          switch (size) {
5393             case 1:
5394                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5395                add = Q ? Iop_Add16x8 : Iop_Add16x4;
5396                sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
5397                break;
5398             case 2:
5399                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5400                add = Q ? Iop_Add32x4 : Iop_Add32x2;
5401                sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
5402                break;
5403             case 0:
5404             case 3:
5405                return False;
5406             default:
5407                vassert(0);
5408          }
5409       }
5410       op2 = INSN(10,10) ? sub : add;
5411       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5412       if (Q)
5413          putQReg(dreg, binop_w_fake_RM(op2, getQReg(dreg), mkexpr(res)),
5414                  condT);
5415       else
5416          putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)),
5417                     condT);
5418       DIP("vml%c.%c%d %c%u, %c%u, d%u[%u]\n", INSN(10,10) ? 's' : 'a',
5419             INSN(8,8) ? 'f' : 'i', 8 << size,
5420             Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, mreg, index);
5421       return True;
5422    }
5423
5424    /* VMLAL, VMLSL (scalar)   */
5425    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,0)) {
5426       IRTemp res, arg_m, arg_n;
5427       IROp dup, get, op, op2, add, sub;
5428       if (dreg & 1)
5429          return False;
5430       dreg >>= 1;
5431       res = newTemp(Ity_V128);
5432       arg_m = newTemp(Ity_I64);
5433       arg_n = newTemp(Ity_I64);
5434       assign(arg_n, getDRegI64(nreg));
5435       switch(size) {
5436          case 1:
5437             dup = Iop_Dup16x4;
5438             get = Iop_GetElem16x4;
5439             index = mreg >> 3;
5440             mreg &= 7;
5441             break;
5442          case 2:
5443             dup = Iop_Dup32x2;
5444             get = Iop_GetElem32x2;
5445             index = mreg >> 4;
5446             mreg &= 0xf;
5447             break;
5448          case 0:
5449          case 3:
5450             return False;
5451          default:
5452             vassert(0);
5453       }
5454       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5455       switch (size) {
5456          case 1:
5457             op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5458             add = Iop_Add32x4;
5459             sub = Iop_Sub32x4;
5460             break;
5461          case 2:
5462             op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5463             add = Iop_Add64x2;
5464             sub = Iop_Sub64x2;
5465             break;
5466          case 0:
5467          case 3:
5468             return False;
5469          default:
5470             vassert(0);
5471       }
5472       op2 = INSN(10,10) ? sub : add;
5473       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5474       putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5475       DIP("vml%cl.%c%d q%u, d%u, d%u[%u]\n",
5476           INSN(10,10) ? 's' : 'a', U ? 'u' : 's',
5477           8 << size, dreg, nreg, mreg, index);
5478       return True;
5479    }
5480
5481    /* VQDMLAL, VQDMLSL (scalar)  */
5482    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,1) && !U) {
5483       IRTemp res, arg_m, arg_n, tmp;
5484       IROp dup, get, op, op2, add, cmp;
5485       UInt P = INSN(10,10);
5486       ULong imm;
5487       if (dreg & 1)
5488          return False;
5489       dreg >>= 1;
5490       res = newTemp(Ity_V128);
5491       arg_m = newTemp(Ity_I64);
5492       arg_n = newTemp(Ity_I64);
5493       assign(arg_n, getDRegI64(nreg));
5494       switch(size) {
5495          case 1:
5496             dup = Iop_Dup16x4;
5497             get = Iop_GetElem16x4;
5498             index = mreg >> 3;
5499             mreg &= 7;
5500             break;
5501          case 2:
5502             dup = Iop_Dup32x2;
5503             get = Iop_GetElem32x2;
5504             index = mreg >> 4;
5505             mreg &= 0xf;
5506             break;
5507          case 0:
5508          case 3:
5509             return False;
5510          default:
5511             vassert(0);
5512       }
5513       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5514       switch (size) {
5515          case 0:
5516          case 3:
5517             return False;
5518          case 1:
5519             op = Iop_QDMull16Sx4;
5520             cmp = Iop_CmpEQ16x4;
5521             add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5522             op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5523             imm = 1LL << 15;
5524             imm = (imm << 16) | imm;
5525             imm = (imm << 32) | imm;
5526             break;
5527          case 2:
5528             op = Iop_QDMull32Sx2;
5529             cmp = Iop_CmpEQ32x2;
5530             add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5531             op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5532             imm = 1LL << 31;
5533             imm = (imm << 32) | imm;
5534             break;
5535          default:
5536             vassert(0);
5537       }
5538       res = newTemp(Ity_V128);
5539       tmp = newTemp(Ity_V128);
5540       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5541       assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5542       setFlag_QC(binop(Iop_And64,
5543                        binop(cmp, mkexpr(arg_n), mkU64(imm)),
5544                        binop(cmp, mkexpr(arg_m), mkU64(imm))),
5545                  mkU64(0),
5546                  False, condT);
5547       setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5548                  True, condT);
5549       putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5550       DIP("vqdml%cl.s%d q%u, d%u, d%u[%u]\n", P ? 's' : 'a', 8 << size,
5551           dreg, nreg, mreg, index);
5552       return True;
5553    }
5554
5555    /* VMUL (by scalar)  */
5556    if ((INSN(11,8) & BITS4(1,1,1,0)) == BITS4(1,0,0,0)) {
5557       IRTemp res, arg_m, arg_n;
5558       IROp dup, get, op;
5559       if (Q) {
5560          if ((dreg & 1) || (nreg & 1))
5561             return False;
5562          dreg >>= 1;
5563          nreg >>= 1;
5564          res = newTemp(Ity_V128);
5565          arg_m = newTemp(Ity_V128);
5566          arg_n = newTemp(Ity_V128);
5567          assign(arg_n, getQReg(nreg));
5568          switch(size) {
5569             case 1:
5570                dup = Iop_Dup16x8;
5571                get = Iop_GetElem16x4;
5572                index = mreg >> 3;
5573                mreg &= 7;
5574                break;
5575             case 2:
5576                dup = Iop_Dup32x4;
5577                get = Iop_GetElem32x2;
5578                index = mreg >> 4;
5579                mreg &= 0xf;
5580                break;
5581             case 0:
5582             case 3:
5583                return False;
5584             default:
5585                vassert(0);
5586          }
5587          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5588       } else {
5589          res = newTemp(Ity_I64);
5590          arg_m = newTemp(Ity_I64);
5591          arg_n = newTemp(Ity_I64);
5592          assign(arg_n, getDRegI64(nreg));
5593          switch(size) {
5594             case 1:
5595                dup = Iop_Dup16x4;
5596                get = Iop_GetElem16x4;
5597                index = mreg >> 3;
5598                mreg &= 7;
5599                break;
5600             case 2:
5601                dup = Iop_Dup32x2;
5602                get = Iop_GetElem32x2;
5603                index = mreg >> 4;
5604                mreg &= 0xf;
5605                break;
5606             case 0:
5607             case 3:
5608                return False;
5609             default:
5610                vassert(0);
5611          }
5612          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5613       }
5614       if (INSN(8,8)) {
5615          switch (size) {
5616             case 2:
5617                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5618                break;
5619             case 0:
5620             case 1:
5621             case 3:
5622                return False;
5623             default:
5624                vassert(0);
5625          }
5626       } else {
5627          switch (size) {
5628             case 1:
5629                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5630                break;
5631             case 2:
5632                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5633                break;
5634             case 0:
5635             case 3:
5636                return False;
5637             default:
5638                vassert(0);
5639          }
5640       }
5641       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5642       if (Q)
5643          putQReg(dreg, mkexpr(res), condT);
5644       else
5645          putDRegI64(dreg, mkexpr(res), condT);
5646       DIP("vmul.%c%d %c%u, %c%u, d%u[%u]\n", INSN(8,8) ? 'f' : 'i',
5647           8 << size, Q ? 'q' : 'd', dreg,
5648           Q ? 'q' : 'd', nreg, mreg, index);
5649       return True;
5650    }
5651
5652    /* VMULL (scalar) */
5653    if (INSN(11,8) == BITS4(1,0,1,0)) {
5654       IRTemp res, arg_m, arg_n;
5655       IROp dup, get, op;
5656       if (dreg & 1)
5657          return False;
5658       dreg >>= 1;
5659       res = newTemp(Ity_V128);
5660       arg_m = newTemp(Ity_I64);
5661       arg_n = newTemp(Ity_I64);
5662       assign(arg_n, getDRegI64(nreg));
5663       switch(size) {
5664          case 1:
5665             dup = Iop_Dup16x4;
5666             get = Iop_GetElem16x4;
5667             index = mreg >> 3;
5668             mreg &= 7;
5669             break;
5670          case 2:
5671             dup = Iop_Dup32x2;
5672             get = Iop_GetElem32x2;
5673             index = mreg >> 4;
5674             mreg &= 0xf;
5675             break;
5676          case 0:
5677          case 3:
5678             return False;
5679          default:
5680             vassert(0);
5681       }
5682       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5683       switch (size) {
5684          case 1: op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4; break;
5685          case 2: op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2; break;
5686          case 0: case 3: return False;
5687          default: vassert(0);
5688       }
5689       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5690       putQReg(dreg, mkexpr(res), condT);
5691       DIP("vmull.%c%d q%u, d%u, d%u[%u]\n", U ? 'u' : 's', 8 << size, dreg,
5692           nreg, mreg, index);
5693       return True;
5694    }
5695
5696    /* VQDMULL */
5697    if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
5698       IROp op ,op2, dup, get;
5699       ULong imm;
5700       IRTemp arg_m, arg_n;
5701       if (dreg & 1)
5702          return False;
5703       dreg >>= 1;
5704       arg_m = newTemp(Ity_I64);
5705       arg_n = newTemp(Ity_I64);
5706       assign(arg_n, getDRegI64(nreg));
5707       switch(size) {
5708          case 1:
5709             dup = Iop_Dup16x4;
5710             get = Iop_GetElem16x4;
5711             index = mreg >> 3;
5712             mreg &= 7;
5713             break;
5714          case 2:
5715             dup = Iop_Dup32x2;
5716             get = Iop_GetElem32x2;
5717             index = mreg >> 4;
5718             mreg &= 0xf;
5719             break;
5720          case 0:
5721          case 3:
5722             return False;
5723          default:
5724             vassert(0);
5725       }
5726       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5727       switch (size) {
5728          case 0:
5729          case 3:
5730             return False;
5731          case 1:
5732             op = Iop_QDMull16Sx4;
5733             op2 = Iop_CmpEQ16x4;
5734             imm = 1LL << 15;
5735             imm = (imm << 16) | imm;
5736             imm = (imm << 32) | imm;
5737             break;
5738          case 2:
5739             op = Iop_QDMull32Sx2;
5740             op2 = Iop_CmpEQ32x2;
5741             imm = 1LL << 31;
5742             imm = (imm << 32) | imm;
5743             break;
5744          default:
5745             vassert(0);
5746       }
5747       putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5748             condT);
5749       setFlag_QC(binop(Iop_And64,
5750                        binop(op2, mkexpr(arg_n), mkU64(imm)),
5751                        binop(op2, mkexpr(arg_m), mkU64(imm))),
5752                  mkU64(0),
5753                  False, condT);
5754       DIP("vqdmull.s%d q%u, d%u, d%u[%u]\n", 8 << size, dreg, nreg, mreg,
5755           index);
5756       return True;
5757    }
5758
5759    /* VQDMULH */
5760    if (INSN(11,8) == BITS4(1,1,0,0)) {
5761       IROp op ,op2, dup, get;
5762       ULong imm;
5763       IRTemp res, arg_m, arg_n;
5764       if (Q) {
5765          if ((dreg & 1) || (nreg & 1))
5766             return False;
5767          dreg >>= 1;
5768          nreg >>= 1;
5769          res = newTemp(Ity_V128);
5770          arg_m = newTemp(Ity_V128);
5771          arg_n = newTemp(Ity_V128);
5772          assign(arg_n, getQReg(nreg));
5773          switch(size) {
5774             case 1:
5775                dup = Iop_Dup16x8;
5776                get = Iop_GetElem16x4;
5777                index = mreg >> 3;
5778                mreg &= 7;
5779                break;
5780             case 2:
5781                dup = Iop_Dup32x4;
5782                get = Iop_GetElem32x2;
5783                index = mreg >> 4;
5784                mreg &= 0xf;
5785                break;
5786             case 0:
5787             case 3:
5788                return False;
5789             default:
5790                vassert(0);
5791          }
5792          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5793       } else {
5794          res = newTemp(Ity_I64);
5795          arg_m = newTemp(Ity_I64);
5796          arg_n = newTemp(Ity_I64);
5797          assign(arg_n, getDRegI64(nreg));
5798          switch(size) {
5799             case 1:
5800                dup = Iop_Dup16x4;
5801                get = Iop_GetElem16x4;
5802                index = mreg >> 3;
5803                mreg &= 7;
5804                break;
5805             case 2:
5806                dup = Iop_Dup32x2;
5807                get = Iop_GetElem32x2;
5808                index = mreg >> 4;
5809                mreg &= 0xf;
5810                break;
5811             case 0:
5812             case 3:
5813                return False;
5814             default:
5815                vassert(0);
5816          }
5817          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5818       }
5819       switch (size) {
5820          case 0:
5821          case 3:
5822             return False;
5823          case 1:
5824             op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
5825             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5826             imm = 1LL << 15;
5827             imm = (imm << 16) | imm;
5828             imm = (imm << 32) | imm;
5829             break;
5830          case 2:
5831             op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
5832             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5833             imm = 1LL << 31;
5834             imm = (imm << 32) | imm;
5835             break;
5836          default:
5837             vassert(0);
5838       }
5839       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5840       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5841                        binop(op2, mkexpr(arg_n),
5842                                   Q ? mkU128(imm) : mkU64(imm)),
5843                        binop(op2, mkexpr(arg_m),
5844                              Q ? mkU128(imm) : mkU64(imm))),
5845                  Q ? mkU128(0) : mkU64(0),
5846                  Q, condT);
5847       if (Q)
5848          putQReg(dreg, mkexpr(res), condT);
5849       else
5850          putDRegI64(dreg, mkexpr(res), condT);
5851       DIP("vqdmulh.s%d %c%u, %c%u, d%u[%u]\n",
5852           8 << size, Q ? 'q' : 'd', dreg,
5853           Q ? 'q' : 'd', nreg, mreg, index);
5854       return True;
5855    }
5856
5857    /* VQRDMULH (scalar) */
5858    if (INSN(11,8) == BITS4(1,1,0,1)) {
5859       IROp op ,op2, dup, get;
5860       ULong imm;
5861       IRTemp res, arg_m, arg_n;
5862       if (Q) {
5863          if ((dreg & 1) || (nreg & 1))
5864             return False;
5865          dreg >>= 1;
5866          nreg >>= 1;
5867          res = newTemp(Ity_V128);
5868          arg_m = newTemp(Ity_V128);
5869          arg_n = newTemp(Ity_V128);
5870          assign(arg_n, getQReg(nreg));
5871          switch(size) {
5872             case 1:
5873                dup = Iop_Dup16x8;
5874                get = Iop_GetElem16x4;
5875                index = mreg >> 3;
5876                mreg &= 7;
5877                break;
5878             case 2:
5879                dup = Iop_Dup32x4;
5880                get = Iop_GetElem32x2;
5881                index = mreg >> 4;
5882                mreg &= 0xf;
5883                break;
5884             case 0:
5885             case 3:
5886                return False;
5887             default:
5888                vassert(0);
5889          }
5890          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5891       } else {
5892          res = newTemp(Ity_I64);
5893          arg_m = newTemp(Ity_I64);
5894          arg_n = newTemp(Ity_I64);
5895          assign(arg_n, getDRegI64(nreg));
5896          switch(size) {
5897             case 1:
5898                dup = Iop_Dup16x4;
5899                get = Iop_GetElem16x4;
5900                index = mreg >> 3;
5901                mreg &= 7;
5902                break;
5903             case 2:
5904                dup = Iop_Dup32x2;
5905                get = Iop_GetElem32x2;
5906                index = mreg >> 4;
5907                mreg &= 0xf;
5908                break;
5909             case 0:
5910             case 3:
5911                return False;
5912             default:
5913                vassert(0);
5914          }
5915          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5916       }
5917       switch (size) {
5918          case 0:
5919          case 3:
5920             return False;
5921          case 1:
5922             op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
5923             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5924             imm = 1LL << 15;
5925             imm = (imm << 16) | imm;
5926             imm = (imm << 32) | imm;
5927             break;
5928          case 2:
5929             op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
5930             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5931             imm = 1LL << 31;
5932             imm = (imm << 32) | imm;
5933             break;
5934          default:
5935             vassert(0);
5936       }
5937       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5938       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5939                        binop(op2, mkexpr(arg_n),
5940                                   Q ? mkU128(imm) : mkU64(imm)),
5941                        binop(op2, mkexpr(arg_m),
5942                                   Q ? mkU128(imm) : mkU64(imm))),
5943                  Q ? mkU128(0) : mkU64(0),
5944                  Q, condT);
5945       if (Q)
5946          putQReg(dreg, mkexpr(res), condT);
5947       else
5948          putDRegI64(dreg, mkexpr(res), condT);
5949       DIP("vqrdmulh.s%d %c%u, %c%u, d%u[%u]\n",
5950           8 << size, Q ? 'q' : 'd', dreg,
5951           Q ? 'q' : 'd', nreg, mreg, index);
5952       return True;
5953    }
5954
5955    return False;
5956 #  undef INSN
5957 }
5958
5959 /* A7.4.4 Two registers and a shift amount */
5960 static
5961 Bool dis_neon_data_2reg_and_shift ( UInt theInstr, IRTemp condT )
5962 {
5963    UInt A = (theInstr >> 8) & 0xf;
5964    UInt B = (theInstr >> 6) & 1;
5965    UInt L = (theInstr >> 7) & 1;
5966    UInt U = (theInstr >> 24) & 1;
5967    UInt Q = B;
5968    UInt imm6 = (theInstr >> 16) & 0x3f;
5969    UInt shift_imm;
5970    UInt size = 4;
5971    UInt tmp;
5972    UInt mreg = get_neon_m_regno(theInstr);
5973    UInt dreg = get_neon_d_regno(theInstr);
5974    ULong imm = 0;
5975    IROp op, cvt, add = Iop_INVALID, cvt2, op_rev;
5976    IRTemp reg_m, res, mask;
5977
5978    if (L == 0 && ((theInstr >> 19) & 7) == 0)
5979       /* It is one reg and immediate */
5980       return False;
5981
5982    tmp = (L << 6) | imm6;
5983    if (tmp & 0x40) {
5984       size = 3;
5985       shift_imm = 64 - imm6;
5986    } else if (tmp & 0x20) {
5987       size = 2;
5988       shift_imm = 64 - imm6;
5989    } else if (tmp & 0x10) {
5990       size = 1;
5991       shift_imm = 32 - imm6;
5992    } else if (tmp & 0x8) {
5993       size = 0;
5994       shift_imm = 16 - imm6;
5995    } else {
5996       return False;
5997    }
5998
5999    switch (A) {
6000       case 3:
6001       case 2:
6002          /* VRSHR, VRSRA */
6003          if (shift_imm > 0) {
6004             IRExpr *imm_val;
6005             imm = 1L;
6006             switch (size) {
6007                case 0:
6008                   imm = (imm << 8) | imm;
6009                   /* fall through */
6010                case 1:
6011                   imm = (imm << 16) | imm;
6012                   /* fall through */
6013                case 2:
6014                   imm = (imm << 32) | imm;
6015                   /* fall through */
6016                case 3:
6017                   break;
6018                default:
6019                   vassert(0);
6020             }
6021             if (Q) {
6022                reg_m = newTemp(Ity_V128);
6023                res = newTemp(Ity_V128);
6024                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
6025                assign(reg_m, getQReg(mreg));
6026                switch (size) {
6027                   case 0:
6028                      add = Iop_Add8x16;
6029                      op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
6030                      break;
6031                   case 1:
6032                      add = Iop_Add16x8;
6033                      op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6034                      break;
6035                   case 2:
6036                      add = Iop_Add32x4;
6037                      op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6038                      break;
6039                   case 3:
6040                      add = Iop_Add64x2;
6041                      op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6042                      break;
6043                   default:
6044                      vassert(0);
6045                }
6046             } else {
6047                reg_m = newTemp(Ity_I64);
6048                res = newTemp(Ity_I64);
6049                imm_val = mkU64(imm);
6050                assign(reg_m, getDRegI64(mreg));
6051                switch (size) {
6052                   case 0:
6053                      add = Iop_Add8x8;
6054                      op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
6055                      break;
6056                   case 1:
6057                      add = Iop_Add16x4;
6058                      op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
6059                      break;
6060                   case 2:
6061                      add = Iop_Add32x2;
6062                      op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6063                      break;
6064                   case 3:
6065                      add = Iop_Add64;
6066                      op = U ? Iop_Shr64 : Iop_Sar64;
6067                      break;
6068                   default:
6069                      vassert(0);
6070                }
6071             }
6072             assign(res,
6073                    binop(add,
6074                          binop(op,
6075                                mkexpr(reg_m),
6076                                mkU8(shift_imm)),
6077                          binop(Q ? Iop_AndV128 : Iop_And64,
6078                                binop(op,
6079                                      mkexpr(reg_m),
6080                                      mkU8(shift_imm - 1)),
6081                                imm_val)));
6082          } else {
6083             if (Q) {
6084                res = newTemp(Ity_V128);
6085                assign(res, getQReg(mreg));
6086             } else {
6087                res = newTemp(Ity_I64);
6088                assign(res, getDRegI64(mreg));
6089             }
6090          }
6091          if (A == 3) {
6092             if (Q) {
6093                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6094                              condT);
6095             } else {
6096                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6097                                 condT);
6098             }
6099             DIP("vrsra.%c%d %c%u, %c%u, #%u\n",
6100                 U ? 'u' : 's', 8 << size,
6101                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6102          } else {
6103             if (Q) {
6104                putQReg(dreg, mkexpr(res), condT);
6105             } else {
6106                putDRegI64(dreg, mkexpr(res), condT);
6107             }
6108             DIP("vrshr.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6109                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6110          }
6111          return True;
6112       case 1:
6113       case 0:
6114          /* VSHR, VSRA */
6115          if (Q) {
6116             reg_m = newTemp(Ity_V128);
6117             assign(reg_m, getQReg(mreg));
6118             res = newTemp(Ity_V128);
6119          } else {
6120             reg_m = newTemp(Ity_I64);
6121             assign(reg_m, getDRegI64(mreg));
6122             res = newTemp(Ity_I64);
6123          }
6124          if (Q) {
6125             switch (size) {
6126                case 0:
6127                   op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
6128                   add = Iop_Add8x16;
6129                   break;
6130                case 1:
6131                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6132                   add = Iop_Add16x8;
6133                   break;
6134                case 2:
6135                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6136                   add = Iop_Add32x4;
6137                   break;
6138                case 3:
6139                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6140                   add = Iop_Add64x2;
6141                   break;
6142                default:
6143                   vassert(0);
6144             }
6145          } else {
6146             switch (size) {
6147                case 0:
6148                   op =  U ? Iop_ShrN8x8 : Iop_SarN8x8;
6149                   add = Iop_Add8x8;
6150                   break;
6151                case 1:
6152                   op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
6153                   add = Iop_Add16x4;
6154                   break;
6155                case 2:
6156                   op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6157                   add = Iop_Add32x2;
6158                   break;
6159                case 3:
6160                   op = U ? Iop_Shr64 : Iop_Sar64;
6161                   add = Iop_Add64;
6162                   break;
6163                default:
6164                   vassert(0);
6165             }
6166          }
6167          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6168          if (A == 1) {
6169             if (Q) {
6170                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6171                              condT);
6172             } else {
6173                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6174                                 condT);
6175             }
6176             DIP("vsra.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6177                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6178          } else {
6179             if (Q) {
6180                putQReg(dreg, mkexpr(res), condT);
6181             } else {
6182                putDRegI64(dreg, mkexpr(res), condT);
6183             }
6184             DIP("vshr.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6185                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6186          }
6187          return True;
6188       case 4:
6189          /* VSRI */
6190          if (!U)
6191             return False;
6192          if (Q) {
6193             res = newTemp(Ity_V128);
6194             mask = newTemp(Ity_V128);
6195          } else {
6196             res = newTemp(Ity_I64);
6197             mask = newTemp(Ity_I64);
6198          }
6199          switch (size) {
6200             case 0: op = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; break;
6201             case 1: op = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; break;
6202             case 2: op = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; break;
6203             case 3: op = Q ? Iop_ShrN64x2 : Iop_Shr64; break;
6204             default: vassert(0);
6205          }
6206          if (Q) {
6207             assign(mask, binop(op, binop(Iop_64HLtoV128,
6208                                          mkU64(0xFFFFFFFFFFFFFFFFLL),
6209                                          mkU64(0xFFFFFFFFFFFFFFFFLL)),
6210                                mkU8(shift_imm)));
6211             assign(res, binop(Iop_OrV128,
6212                               binop(Iop_AndV128,
6213                                     getQReg(dreg),
6214                                     unop(Iop_NotV128,
6215                                          mkexpr(mask))),
6216                               binop(op,
6217                                     getQReg(mreg),
6218                                     mkU8(shift_imm))));
6219             putQReg(dreg, mkexpr(res), condT);
6220          } else {
6221             assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6222                                mkU8(shift_imm)));
6223             assign(res, binop(Iop_Or64,
6224                               binop(Iop_And64,
6225                                     getDRegI64(dreg),
6226                                     unop(Iop_Not64,
6227                                          mkexpr(mask))),
6228                               binop(op,
6229                                     getDRegI64(mreg),
6230                                     mkU8(shift_imm))));
6231             putDRegI64(dreg, mkexpr(res), condT);
6232          }
6233          DIP("vsri.%d %c%u, %c%u, #%u\n",
6234              8 << size, Q ? 'q' : 'd', dreg,
6235              Q ? 'q' : 'd', mreg, shift_imm);
6236          return True;
6237       case 5:
6238          if (U) {
6239             /* VSLI */
6240             shift_imm = 8 * (1 << size) - shift_imm;
6241             if (Q) {
6242                res = newTemp(Ity_V128);
6243                mask = newTemp(Ity_V128);
6244             } else {
6245                res = newTemp(Ity_I64);
6246                mask = newTemp(Ity_I64);
6247             }
6248             switch (size) {
6249                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6250                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6251                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6252                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6253                default: vassert(0);
6254             }
6255             if (Q) {
6256                assign(mask, binop(op, binop(Iop_64HLtoV128,
6257                                             mkU64(0xFFFFFFFFFFFFFFFFLL),
6258                                             mkU64(0xFFFFFFFFFFFFFFFFLL)),
6259                                   mkU8(shift_imm)));
6260                assign(res, binop(Iop_OrV128,
6261                                  binop(Iop_AndV128,
6262                                        getQReg(dreg),
6263                                        unop(Iop_NotV128,
6264                                             mkexpr(mask))),
6265                                  binop(op,
6266                                        getQReg(mreg),
6267                                        mkU8(shift_imm))));
6268                putQReg(dreg, mkexpr(res), condT);
6269             } else {
6270                assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6271                                   mkU8(shift_imm)));
6272                assign(res, binop(Iop_Or64,
6273                                  binop(Iop_And64,
6274                                        getDRegI64(dreg),
6275                                        unop(Iop_Not64,
6276                                             mkexpr(mask))),
6277                                  binop(op,
6278                                        getDRegI64(mreg),
6279                                        mkU8(shift_imm))));
6280                putDRegI64(dreg, mkexpr(res), condT);
6281             }
6282             DIP("vsli.%d %c%u, %c%u, #%u\n",
6283                 8 << size, Q ? 'q' : 'd', dreg,
6284                 Q ? 'q' : 'd', mreg, shift_imm);
6285             return True;
6286          } else {
6287             /* VSHL #imm */
6288             shift_imm = 8 * (1 << size) - shift_imm;
6289             if (Q) {
6290                res = newTemp(Ity_V128);
6291             } else {
6292                res = newTemp(Ity_I64);
6293             }
6294             switch (size) {
6295                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6296                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6297                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6298                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6299                default: vassert(0);
6300             }
6301             assign(res, binop(op, Q ? getQReg(mreg) : getDRegI64(mreg),
6302                      mkU8(shift_imm)));
6303             if (Q) {
6304                putQReg(dreg, mkexpr(res), condT);
6305             } else {
6306                putDRegI64(dreg, mkexpr(res), condT);
6307             }
6308             DIP("vshl.i%d %c%u, %c%u, #%u\n",
6309                 8 << size, Q ? 'q' : 'd', dreg,
6310                 Q ? 'q' : 'd', mreg, shift_imm);
6311             return True;
6312          }
6313          break;
6314       case 6:
6315       case 7:
6316          /* VQSHL, VQSHLU */
6317          shift_imm = 8 * (1 << size) - shift_imm;
6318          if (U) {
6319             if (A & 1) {
6320                switch (size) {
6321                   case 0:
6322                      op = Q ? Iop_QShlNsatUU8x16 : Iop_QShlNsatUU8x8;
6323                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6324                      break;
6325                   case 1:
6326                      op = Q ? Iop_QShlNsatUU16x8 : Iop_QShlNsatUU16x4;
6327                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6328                      break;
6329                   case 2:
6330                      op = Q ? Iop_QShlNsatUU32x4 : Iop_QShlNsatUU32x2;
6331                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6332                      break;
6333                   case 3:
6334                      op = Q ? Iop_QShlNsatUU64x2 : Iop_QShlNsatUU64x1;
6335                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6336                      break;
6337                   default:
6338                      vassert(0);
6339                }
6340                DIP("vqshl.u%d %c%u, %c%u, #%u\n",
6341                    8 << size,
6342                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6343             } else {
6344                switch (size) {
6345                   case 0:
6346                      op = Q ? Iop_QShlNsatSU8x16 : Iop_QShlNsatSU8x8;
6347                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6348                      break;
6349                   case 1:
6350                      op = Q ? Iop_QShlNsatSU16x8 : Iop_QShlNsatSU16x4;
6351                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6352                      break;
6353                   case 2:
6354                      op = Q ? Iop_QShlNsatSU32x4 : Iop_QShlNsatSU32x2;
6355                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6356                      break;
6357                   case 3:
6358                      op = Q ? Iop_QShlNsatSU64x2 : Iop_QShlNsatSU64x1;
6359                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6360                      break;
6361                   default:
6362                      vassert(0);
6363                }
6364                DIP("vqshlu.s%d %c%u, %c%u, #%u\n",
6365                    8 << size,
6366                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6367             }
6368          } else {
6369             if (!(A & 1))
6370                return False;
6371             switch (size) {
6372                case 0:
6373                   op = Q ? Iop_QShlNsatSS8x16 : Iop_QShlNsatSS8x8;
6374                   op_rev = Q ? Iop_SarN8x16 : Iop_SarN8x8;
6375                   break;
6376                case 1:
6377                   op = Q ? Iop_QShlNsatSS16x8 : Iop_QShlNsatSS16x4;
6378                   op_rev = Q ? Iop_SarN16x8 : Iop_SarN16x4;
6379                   break;
6380                case 2:
6381                   op = Q ? Iop_QShlNsatSS32x4 : Iop_QShlNsatSS32x2;
6382                   op_rev = Q ? Iop_SarN32x4 : Iop_SarN32x2;
6383                   break;
6384                case 3:
6385                   op = Q ? Iop_QShlNsatSS64x2 : Iop_QShlNsatSS64x1;
6386                   op_rev = Q ? Iop_SarN64x2 : Iop_Sar64;
6387                   break;
6388                default:
6389                   vassert(0);
6390             }
6391             DIP("vqshl.s%d %c%u, %c%u, #%u\n",
6392                 8 << size,
6393                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6394          }
6395          if (Q) {
6396             tmp = newTemp(Ity_V128);
6397             res = newTemp(Ity_V128);
6398             reg_m = newTemp(Ity_V128);
6399             assign(reg_m, getQReg(mreg));
6400          } else {
6401             tmp = newTemp(Ity_I64);
6402             res = newTemp(Ity_I64);
6403             reg_m = newTemp(Ity_I64);
6404             assign(reg_m, getDRegI64(mreg));
6405          }
6406          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6407          assign(tmp, binop(op_rev, mkexpr(res), mkU8(shift_imm)));
6408          setFlag_QC(mkexpr(tmp), mkexpr(reg_m), Q, condT);
6409          if (Q)
6410             putQReg(dreg, mkexpr(res), condT);
6411          else
6412             putDRegI64(dreg, mkexpr(res), condT);
6413          return True;
6414       case 8:
6415          if (!U) {
6416             if (L == 1)
6417                return False;
6418             size++;
6419             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6420             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
6421             if (mreg & 1)
6422                return False;
6423             mreg >>= 1;
6424             if (!B) {
6425                /* VSHRN*/
6426                IROp narOp;
6427                reg_m = newTemp(Ity_V128);
6428                assign(reg_m, getQReg(mreg));
6429                res = newTemp(Ity_I64);
6430                switch (size) {
6431                   case 1:
6432                      op = Iop_ShrN16x8;
6433                      narOp = Iop_NarrowUn16to8x8;
6434                      break;
6435                   case 2:
6436                      op = Iop_ShrN32x4;
6437                      narOp = Iop_NarrowUn32to16x4;
6438                      break;
6439                   case 3:
6440                      op = Iop_ShrN64x2;
6441                      narOp = Iop_NarrowUn64to32x2;
6442                      break;
6443                   default:
6444                      vassert(0);
6445                }
6446                assign(res, unop(narOp,
6447                                 binop(op,
6448                                       mkexpr(reg_m),
6449                                       mkU8(shift_imm))));
6450                putDRegI64(dreg, mkexpr(res), condT);
6451                DIP("vshrn.i%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6452                    shift_imm);
6453                return True;
6454             } else {
6455                /* VRSHRN   */
6456                IROp addOp, shOp, narOp;
6457                IRExpr *imm_val;
6458                reg_m = newTemp(Ity_V128);
6459                assign(reg_m, getQReg(mreg));
6460                res = newTemp(Ity_I64);
6461                imm = 1L;
6462                switch (size) {
6463                   case 0: imm = (imm <<  8) | imm; /* fall through */
6464                   case 1: imm = (imm << 16) | imm; /* fall through */
6465                   case 2: imm = (imm << 32) | imm; /* fall through */
6466                   case 3: break;
6467                   default: vassert(0);
6468                }
6469                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
6470                switch (size) {
6471                   case 1:
6472                      addOp = Iop_Add16x8;
6473                      shOp = Iop_ShrN16x8;
6474                      narOp = Iop_NarrowUn16to8x8;
6475                      break;
6476                   case 2:
6477                      addOp = Iop_Add32x4;
6478                      shOp = Iop_ShrN32x4;
6479                      narOp = Iop_NarrowUn32to16x4;
6480                      break;
6481                   case 3:
6482                      addOp = Iop_Add64x2;
6483                      shOp = Iop_ShrN64x2;
6484                      narOp = Iop_NarrowUn64to32x2;
6485                      break;
6486                   default:
6487                      vassert(0);
6488                }
6489                assign(res, unop(narOp,
6490                                 binop(addOp,
6491                                       binop(shOp,
6492                                             mkexpr(reg_m),
6493                                             mkU8(shift_imm)),
6494                                       binop(Iop_AndV128,
6495                                             binop(shOp,
6496                                                   mkexpr(reg_m),
6497                                                   mkU8(shift_imm - 1)),
6498                                             imm_val))));
6499                putDRegI64(dreg, mkexpr(res), condT);
6500                if (shift_imm == 0) {
6501                   DIP("vmov%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6502                       shift_imm);
6503                } else {
6504                   DIP("vrshrn.i%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6505                       shift_imm);
6506                }
6507                return True;
6508             }
6509          }
6510          /* else fall through */
6511       case 9:
6512          dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6513          mreg = ((theInstr >>  1) & 0x10) | (theInstr & 0xF);
6514          if (mreg & 1)
6515             return False;
6516          mreg >>= 1;
6517          size++;
6518          if ((theInstr >> 8) & 1) {
6519             switch (size) {
6520                case 1:
6521                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6522                   cvt = U ? Iop_QNarrowUn16Uto8Ux8 : Iop_QNarrowUn16Sto8Sx8;
6523                   cvt2 = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6524                   break;
6525                case 2:
6526                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6527                   cvt = U ? Iop_QNarrowUn32Uto16Ux4 : Iop_QNarrowUn32Sto16Sx4;
6528                   cvt2 = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6529                   break;
6530                case 3:
6531                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6532                   cvt = U ? Iop_QNarrowUn64Uto32Ux2 : Iop_QNarrowUn64Sto32Sx2;
6533                   cvt2 = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6534                   break;
6535                default:
6536                   vassert(0);
6537             }
6538             DIP("vq%sshrn.%c%d d%u, q%u, #%u\n", B ? "r" : "",
6539                 U ? 'u' : 's', 8 << size, dreg, mreg, shift_imm);
6540          } else {
6541             vassert(U);
6542             switch (size) {
6543                case 1:
6544                   op = Iop_SarN16x8;
6545                   cvt = Iop_QNarrowUn16Sto8Ux8;
6546                   cvt2 = Iop_Widen8Uto16x8;
6547                   break;
6548                case 2:
6549                   op = Iop_SarN32x4;
6550                   cvt = Iop_QNarrowUn32Sto16Ux4;
6551                   cvt2 = Iop_Widen16Uto32x4;
6552                   break;
6553                case 3:
6554                   op = Iop_SarN64x2;
6555                   cvt = Iop_QNarrowUn64Sto32Ux2;
6556                   cvt2 = Iop_Widen32Uto64x2;
6557                   break;
6558                default:
6559                   vassert(0);
6560             }
6561             DIP("vq%sshrun.s%d d%u, q%u, #%u\n", B ? "r" : "",
6562                 8 << size, dreg, mreg, shift_imm);
6563          }
6564          if (B) {
6565             if (shift_imm > 0) {
6566                imm = 1;
6567                switch (size) {
6568                   case 1: imm = (imm << 16) | imm; /* fall through */
6569                   case 2: imm = (imm << 32) | imm; /* fall through */
6570                   case 3: break;
6571                   case 0: default: vassert(0);
6572                }
6573                switch (size) {
6574                   case 1: add = Iop_Add16x8; break;
6575                   case 2: add = Iop_Add32x4; break;
6576                   case 3: add = Iop_Add64x2; break;
6577                   case 0: default: vassert(0);
6578                }
6579             }
6580          }
6581          reg_m = newTemp(Ity_V128);
6582          res = newTemp(Ity_V128);
6583          assign(reg_m, getQReg(mreg));
6584          if (B) {
6585             /* VQRSHRN, VQRSHRUN */
6586             assign(res, binop(add,
6587                               binop(op, mkexpr(reg_m), mkU8(shift_imm)),
6588                               binop(Iop_AndV128,
6589                                     binop(op,
6590                                           mkexpr(reg_m),
6591                                           mkU8(shift_imm - 1)),
6592                                     mkU128(imm))));
6593          } else {
6594             /* VQSHRN, VQSHRUN */
6595             assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6596          }
6597          setFlag_QC(unop(cvt2, unop(cvt, mkexpr(res))), mkexpr(res),
6598                     True, condT);
6599          putDRegI64(dreg, unop(cvt, mkexpr(res)), condT);
6600          return True;
6601       case 10:
6602          /* VSHLL
6603             VMOVL ::= VSHLL #0 */
6604          if (B)
6605             return False;
6606          if (dreg & 1)
6607             return False;
6608          dreg >>= 1;
6609          shift_imm = (8 << size) - shift_imm;
6610          res = newTemp(Ity_V128);
6611          switch (size) {
6612             case 0:
6613                op = Iop_ShlN16x8;
6614                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6615                break;
6616             case 1:
6617                op = Iop_ShlN32x4;
6618                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6619                break;
6620             case 2:
6621                op = Iop_ShlN64x2;
6622                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6623                break;
6624             case 3:
6625                return False;
6626             default:
6627                vassert(0);
6628          }
6629          assign(res, binop(op, unop(cvt, getDRegI64(mreg)), mkU8(shift_imm)));
6630          putQReg(dreg, mkexpr(res), condT);
6631          if (shift_imm == 0) {
6632             DIP("vmovl.%c%d q%u, d%u\n", U ? 'u' : 's', 8 << size,
6633                 dreg, mreg);
6634          } else {
6635             DIP("vshll.%c%d q%u, d%u, #%u\n", U ? 'u' : 's', 8 << size,
6636                 dreg, mreg, shift_imm);
6637          }
6638          return True;
6639       case 14:
6640       case 15:
6641          /* VCVT floating-point <-> fixed-point */
6642          if ((theInstr >> 8) & 1) {
6643             if (U) {
6644                op = Q ? Iop_F32ToFixed32Ux4_RZ : Iop_F32ToFixed32Ux2_RZ;
6645             } else {
6646                op = Q ? Iop_F32ToFixed32Sx4_RZ : Iop_F32ToFixed32Sx2_RZ;
6647             }
6648             DIP("vcvt.%c32.f32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6649                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6650                 64 - ((theInstr >> 16) & 0x3f));
6651          } else {
6652             if (U) {
6653                op = Q ? Iop_Fixed32UToF32x4_RN : Iop_Fixed32UToF32x2_RN;
6654             } else {
6655                op = Q ? Iop_Fixed32SToF32x4_RN : Iop_Fixed32SToF32x2_RN;
6656             }
6657             DIP("vcvt.f32.%c32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6658                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6659                 64 - ((theInstr >> 16) & 0x3f));
6660          }
6661          if (((theInstr >> 21) & 1) == 0)
6662             return False;
6663          if (Q) {
6664             putQReg(dreg, binop(op, getQReg(mreg),
6665                      mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6666          } else {
6667             putDRegI64(dreg, binop(op, getDRegI64(mreg),
6668                        mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6669          }
6670          return True;
6671       default:
6672          return False;
6673
6674    }
6675    return False;
6676 }
6677
6678 /* A7.4.5 Two registers, miscellaneous */
6679 static
6680 Bool dis_neon_data_2reg_misc ( UInt theInstr, IRTemp condT )
6681 {
6682    UInt A = (theInstr >> 16) & 3;
6683    UInt B = (theInstr >> 6) & 0x1f;
6684    UInt Q = (theInstr >> 6) & 1;
6685    UInt U = (theInstr >> 24) & 1;
6686    UInt size = (theInstr >> 18) & 3;
6687    UInt dreg = get_neon_d_regno(theInstr);
6688    UInt mreg = get_neon_m_regno(theInstr);
6689    UInt F = (theInstr >> 10) & 1;
6690    IRTemp arg_d = IRTemp_INVALID;
6691    IRTemp arg_m = IRTemp_INVALID;
6692    IRTemp res = IRTemp_INVALID;
6693    switch (A) {
6694       case 0:
6695          if (Q) {
6696             arg_m = newTemp(Ity_V128);
6697             res = newTemp(Ity_V128);
6698             assign(arg_m, getQReg(mreg));
6699          } else {
6700             arg_m = newTemp(Ity_I64);
6701             res = newTemp(Ity_I64);
6702             assign(arg_m, getDRegI64(mreg));
6703          }
6704          switch (B >> 1) {
6705             case 0: {
6706                /* VREV64 */
6707                IROp op;
6708                switch (size) {
6709                   case 0:
6710                      op = Q ? Iop_Reverse8sIn64_x2 : Iop_Reverse8sIn64_x1;
6711                      break;
6712                   case 1:
6713                      op = Q ? Iop_Reverse16sIn64_x2 : Iop_Reverse16sIn64_x1;
6714                      break;
6715                   case 2:
6716                      op = Q ? Iop_Reverse32sIn64_x2 : Iop_Reverse32sIn64_x1;
6717                      break;
6718                   case 3:
6719                      return False;
6720                   default:
6721                      vassert(0);
6722                }
6723                assign(res, unop(op, mkexpr(arg_m)));
6724                DIP("vrev64.%d %c%u, %c%u\n", 8 << size,
6725                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6726                break;
6727             }
6728             case 1: {
6729                /* VREV32 */
6730                IROp op;
6731                switch (size) {
6732                   case 0:
6733                      op = Q ? Iop_Reverse8sIn32_x4 : Iop_Reverse8sIn32_x2;
6734                      break;
6735                   case 1:
6736                      op = Q ? Iop_Reverse16sIn32_x4 : Iop_Reverse16sIn32_x2;
6737                      break;
6738                   case 2:
6739                   case 3:
6740                      return False;
6741                   default:
6742                      vassert(0);
6743                }
6744                assign(res, unop(op, mkexpr(arg_m)));
6745                DIP("vrev32.%d %c%u, %c%u\n", 8 << size,
6746                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6747                break;
6748             }
6749             case 2: {
6750                /* VREV16 */
6751                IROp op;
6752                switch (size) {
6753                   case 0:
6754                      op = Q ? Iop_Reverse8sIn16_x8 : Iop_Reverse8sIn16_x4;
6755                      break;
6756                   case 1:
6757                   case 2:
6758                   case 3:
6759                      return False;
6760                   default:
6761                      vassert(0);
6762                }
6763                assign(res, unop(op, mkexpr(arg_m)));
6764                DIP("vrev16.%d %c%u, %c%u\n", 8 << size,
6765                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6766                break;
6767             }
6768             case 3:
6769                return False;
6770             case 4:
6771             case 5: {
6772                /* VPADDL */
6773                IROp op;
6774                U = (theInstr >> 7) & 1;
6775                if (Q) {
6776                   switch (size) {
6777                      case 0: op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16; break;
6778                      case 1: op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8; break;
6779                      case 2: op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4; break;
6780                      case 3: return False;
6781                      default: vassert(0);
6782                   }
6783                } else {
6784                   switch (size) {
6785                      case 0: op = U ? Iop_PwAddL8Ux8  : Iop_PwAddL8Sx8;  break;
6786                      case 1: op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4; break;
6787                      case 2: op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2; break;
6788                      case 3: return False;
6789                      default: vassert(0);
6790                   }
6791                }
6792                assign(res, unop(op, mkexpr(arg_m)));
6793                DIP("vpaddl.%c%d %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6794                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6795                break;
6796             }
6797             case 6:
6798             case 7:
6799                return False;
6800             case 8: {
6801                /* VCLS */
6802                IROp op;
6803                switch (size) {
6804                   case 0: op = Q ? Iop_Cls8x16 : Iop_Cls8x8; break;
6805                   case 1: op = Q ? Iop_Cls16x8 : Iop_Cls16x4; break;
6806                   case 2: op = Q ? Iop_Cls32x4 : Iop_Cls32x2; break;
6807                   case 3: return False;
6808                   default: vassert(0);
6809                }
6810                assign(res, unop(op, mkexpr(arg_m)));
6811                DIP("vcls.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6812                    Q ? 'q' : 'd', mreg);
6813                break;
6814             }
6815             case 9: {
6816                /* VCLZ */
6817                IROp op;
6818                switch (size) {
6819                   case 0: op = Q ? Iop_Clz8x16 : Iop_Clz8x8; break;
6820                   case 1: op = Q ? Iop_Clz16x8 : Iop_Clz16x4; break;
6821                   case 2: op = Q ? Iop_Clz32x4 : Iop_Clz32x2; break;
6822                   case 3: return False;
6823                   default: vassert(0);
6824                }
6825                assign(res, unop(op, mkexpr(arg_m)));
6826                DIP("vclz.i%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6827                    Q ? 'q' : 'd', mreg);
6828                break;
6829             }
6830             case 10:
6831                /* VCNT */
6832                assign(res, unop(Q ? Iop_Cnt8x16 : Iop_Cnt8x8, mkexpr(arg_m)));
6833                DIP("vcnt.8 %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6834                    mreg);
6835                break;
6836             case 11:
6837                /* VMVN */
6838                if (Q)
6839                   assign(res, unop(Iop_NotV128, mkexpr(arg_m)));
6840                else
6841                   assign(res, unop(Iop_Not64, mkexpr(arg_m)));
6842                DIP("vmvn %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6843                    mreg);
6844                break;
6845             case 12:
6846             case 13: {
6847                /* VPADAL */
6848                IROp op, add_op;
6849                U = (theInstr >> 7) & 1;
6850                if (Q) {
6851                   switch (size) {
6852                      case 0:
6853                         op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16;
6854                         add_op = Iop_Add16x8;
6855                         break;
6856                      case 1:
6857                         op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8;
6858                         add_op = Iop_Add32x4;
6859                         break;
6860                      case 2:
6861                         op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4;
6862                         add_op = Iop_Add64x2;
6863                         break;
6864                      case 3:
6865                         return False;
6866                      default:
6867                         vassert(0);
6868                   }
6869                } else {
6870                   switch (size) {
6871                      case 0:
6872                         op = U ? Iop_PwAddL8Ux8 : Iop_PwAddL8Sx8;
6873                         add_op = Iop_Add16x4;
6874                         break;
6875                      case 1:
6876                         op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4;
6877                         add_op = Iop_Add32x2;
6878                         break;
6879                      case 2:
6880                         op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2;
6881                         add_op = Iop_Add64;
6882                         break;
6883                      case 3:
6884                         return False;
6885                      default:
6886                         vassert(0);
6887                   }
6888                }
6889                if (Q) {
6890                   arg_d = newTemp(Ity_V128);
6891                   assign(arg_d, getQReg(dreg));
6892                } else {
6893                   arg_d = newTemp(Ity_I64);
6894                   assign(arg_d, getDRegI64(dreg));
6895                }
6896                assign(res, binop(add_op, unop(op, mkexpr(arg_m)),
6897                                          mkexpr(arg_d)));
6898                DIP("vpadal.%c%d %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6899                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6900                break;
6901             }
6902             case 14: {
6903                /* VQABS */
6904                IROp op_sub, op_qsub, op_cmp;
6905                IRTemp mask, tmp;
6906                IRExpr *zero1, *zero2;
6907                IRExpr *neg, *neg2;
6908                if (Q) {
6909                   zero1 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6910                   zero2 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6911                   mask = newTemp(Ity_V128);
6912                   tmp = newTemp(Ity_V128);
6913                } else {
6914                   zero1 = mkU64(0);
6915                   zero2 = mkU64(0);
6916                   mask = newTemp(Ity_I64);
6917                   tmp = newTemp(Ity_I64);
6918                }
6919                switch (size) {
6920                   case 0:
6921                      op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6922                      op_qsub = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6923                      op_cmp = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
6924                      break;
6925                   case 1:
6926                      op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6927                      op_qsub = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6928                      op_cmp = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4;
6929                      break;
6930                   case 2:
6931                      op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6932                      op_qsub = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6933                      op_cmp = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2;
6934                      break;
6935                   case 3:
6936                      return False;
6937                   default:
6938                      vassert(0);
6939                }
6940                assign(mask, binop(op_cmp, mkexpr(arg_m), zero1));
6941                neg = binop(op_qsub, zero2, mkexpr(arg_m));
6942                neg2 = binop(op_sub, zero2, mkexpr(arg_m));
6943                assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
6944                                  binop(Q ? Iop_AndV128 : Iop_And64,
6945                                        mkexpr(mask),
6946                                        mkexpr(arg_m)),
6947                                  binop(Q ? Iop_AndV128 : Iop_And64,
6948                                        unop(Q ? Iop_NotV128 : Iop_Not64,
6949                                             mkexpr(mask)),
6950                                        neg)));
6951                assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
6952                                  binop(Q ? Iop_AndV128 : Iop_And64,
6953                                        mkexpr(mask),
6954                                        mkexpr(arg_m)),
6955                                  binop(Q ? Iop_AndV128 : Iop_And64,
6956                                        unop(Q ? Iop_NotV128 : Iop_Not64,
6957                                             mkexpr(mask)),
6958                                        neg2)));
6959                setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
6960                DIP("vqabs.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6961                    Q ? 'q' : 'd', mreg);
6962                break;
6963             }
6964             case 15: {
6965                /* VQNEG */
6966                IROp op, op2;
6967                IRExpr *zero;
6968                if (Q) {
6969                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6970                } else {
6971                   zero = mkU64(0);
6972                }
6973                switch (size) {
6974                   case 0:
6975                      op = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6976                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6977                      break;
6978                   case 1:
6979                      op = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6980                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6981                      break;
6982                   case 2:
6983                      op = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6984                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6985                      break;
6986                   case 3:
6987                      return False;
6988                   default:
6989                      vassert(0);
6990                }
6991                assign(res, binop(op, zero, mkexpr(arg_m)));
6992                setFlag_QC(mkexpr(res), binop(op2, zero, mkexpr(arg_m)),
6993                           Q, condT);
6994                DIP("vqneg.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6995                    Q ? 'q' : 'd', mreg);
6996                break;
6997             }
6998             default:
6999                vassert(0);
7000          }
7001          if (Q) {
7002             putQReg(dreg, mkexpr(res), condT);
7003          } else {
7004             putDRegI64(dreg, mkexpr(res), condT);
7005          }
7006          return True;
7007       case 1:
7008          if (Q) {
7009             arg_m = newTemp(Ity_V128);
7010             res = newTemp(Ity_V128);
7011             assign(arg_m, getQReg(mreg));
7012          } else {
7013             arg_m = newTemp(Ity_I64);
7014             res = newTemp(Ity_I64);
7015             assign(arg_m, getDRegI64(mreg));
7016          }
7017          switch ((B >> 1) & 0x7) {
7018             case 0: {
7019                /* VCGT #0 */
7020                IRExpr *zero;
7021                IROp op;
7022                if (Q) {
7023                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7024                } else {
7025                   zero = mkU64(0);
7026                }
7027                if (F) {
7028                   switch (size) {
7029                      case 0: case 1: case 3: return False;
7030                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
7031                      default: vassert(0);
7032                   }
7033                } else {
7034                   switch (size) {
7035                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7036                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7037                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7038                      case 3: return False;
7039                      default: vassert(0);
7040                   }
7041                }
7042                assign(res, binop(op, mkexpr(arg_m), zero));
7043                DIP("vcgt.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7044                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7045                break;
7046             }
7047             case 1: {
7048                /* VCGE #0 */
7049                IROp op;
7050                IRExpr *zero;
7051                if (Q) {
7052                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7053                } else {
7054                   zero = mkU64(0);
7055                }
7056                if (F) {
7057                   switch (size) {
7058                      case 0: case 1: case 3: return False;
7059                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
7060                      default: vassert(0);
7061                   }
7062                   assign(res, binop(op, mkexpr(arg_m), zero));
7063                } else {
7064                   switch (size) {
7065                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7066                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7067                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7068                      case 3: return False;
7069                      default: vassert(0);
7070                   }
7071                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7072                                    binop(op, zero, mkexpr(arg_m))));
7073                }
7074                DIP("vcge.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7075                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7076                break;
7077             }
7078             case 2: {
7079                /* VCEQ #0 */
7080                IROp op;
7081                IRExpr *zero;
7082                if (F) {
7083                   if (Q) {
7084                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7085                   } else {
7086                      zero = mkU64(0);
7087                   }
7088                   switch (size) {
7089                      case 0: case 1: case 3: return False;
7090                      case 2: op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2; break;
7091                      default: vassert(0);
7092                   }
7093                   assign(res, binop(op, zero, mkexpr(arg_m)));
7094                } else {
7095                   switch (size) {
7096                      case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
7097                      case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
7098                      case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
7099                      case 3: return False;
7100                      default: vassert(0);
7101                   }
7102                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7103                                    unop(op, mkexpr(arg_m))));
7104                }
7105                DIP("vceq.%c%d %c%u, %c%u, #0\n", F ? 'f' : 'i', 8 << size,
7106                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7107                break;
7108             }
7109             case 3: {
7110                /* VCLE #0 */
7111                IRExpr *zero;
7112                IROp op;
7113                if (Q) {
7114                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7115                } else {
7116                   zero = mkU64(0);
7117                }
7118                if (F) {
7119                   switch (size) {
7120                      case 0: case 1: case 3: return False;
7121                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
7122                      default: vassert(0);
7123                   }
7124                   assign(res, binop(op, zero, mkexpr(arg_m)));
7125                } else {
7126                   switch (size) {
7127                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7128                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7129                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7130                      case 3: return False;
7131                      default: vassert(0);
7132                   }
7133                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7134                                    binop(op, mkexpr(arg_m), zero)));
7135                }
7136                DIP("vcle.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7137                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7138                break;
7139             }
7140             case 4: {
7141                /* VCLT #0 */
7142                IROp op;
7143                IRExpr *zero;
7144                if (Q) {
7145                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7146                } else {
7147                   zero = mkU64(0);
7148                }
7149                if (F) {
7150                   switch (size) {
7151                      case 0: case 1: case 3: return False;
7152                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
7153                      default: vassert(0);
7154                   }
7155                   assign(res, binop(op, zero, mkexpr(arg_m)));
7156                } else {
7157                   switch (size) {
7158                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7159                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7160                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7161                      case 3: return False;
7162                      default: vassert(0);
7163                   }
7164                   assign(res, binop(op, zero, mkexpr(arg_m)));
7165                }
7166                DIP("vclt.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7167                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7168                break;
7169             }
7170             case 5:
7171                return False;
7172             case 6: {
7173                /* VABS */
7174                if (!F) {
7175                   IROp op;
7176                   switch(size) {
7177                      case 0: op = Q ? Iop_Abs8x16 : Iop_Abs8x8; break;
7178                      case 1: op = Q ? Iop_Abs16x8 : Iop_Abs16x4; break;
7179                      case 2: op = Q ? Iop_Abs32x4 : Iop_Abs32x2; break;
7180                      case 3: return False;
7181                      default: vassert(0);
7182                   }
7183                   assign(res, unop(op, mkexpr(arg_m)));
7184                } else {
7185                   assign(res, unop(Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2,
7186                                    mkexpr(arg_m)));
7187                }
7188                DIP("vabs.%c%d %c%u, %c%u\n",
7189                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7190                    Q ? 'q' : 'd', mreg);
7191                break;
7192             }
7193             case 7: {
7194                /* VNEG */
7195                IROp op;
7196                IRExpr *zero;
7197                if (F) {
7198                   switch (size) {
7199                      case 0: case 1: case 3: return False;
7200                      case 2: op = Q ? Iop_Neg32Fx4 : Iop_Neg32Fx2; break;
7201                      default: vassert(0);
7202                   }
7203                   assign(res, unop(op, mkexpr(arg_m)));
7204                } else {
7205                   if (Q) {
7206                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7207                   } else {
7208                      zero = mkU64(0);
7209                   }
7210                   switch (size) {
7211                      case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
7212                      case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
7213                      case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
7214                      case 3: return False;
7215                      default: vassert(0);
7216                   }
7217                   assign(res, binop(op, zero, mkexpr(arg_m)));
7218                }
7219                DIP("vneg.%c%d %c%u, %c%u\n",
7220                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7221                    Q ? 'q' : 'd', mreg);
7222                break;
7223             }
7224             default:
7225                vassert(0);
7226          }
7227          if (Q) {
7228             putQReg(dreg, mkexpr(res), condT);
7229          } else {
7230             putDRegI64(dreg, mkexpr(res), condT);
7231          }
7232          return True;
7233       case 2:
7234          if ((B >> 1) == 0) {
7235             /* VSWP */
7236             if (Q) {
7237                arg_m = newTemp(Ity_V128);
7238                assign(arg_m, getQReg(mreg));
7239                putQReg(mreg, getQReg(dreg), condT);
7240                putQReg(dreg, mkexpr(arg_m), condT);
7241             } else {
7242                arg_m = newTemp(Ity_I64);
7243                assign(arg_m, getDRegI64(mreg));
7244                putDRegI64(mreg, getDRegI64(dreg), condT);
7245                putDRegI64(dreg, mkexpr(arg_m), condT);
7246             }
7247             DIP("vswp %c%u, %c%u\n",
7248                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7249             return True;
7250          } else if ((B >> 1) == 1) {
7251             /* VTRN */
7252             IROp op_odd = Iop_INVALID, op_even = Iop_INVALID;
7253             IRTemp old_m, old_d, new_d, new_m;
7254             if (Q) {
7255                old_m = newTemp(Ity_V128);
7256                old_d = newTemp(Ity_V128);
7257                new_m = newTemp(Ity_V128);
7258                new_d = newTemp(Ity_V128);
7259                assign(old_m, getQReg(mreg));
7260                assign(old_d, getQReg(dreg));
7261             } else {
7262                old_m = newTemp(Ity_I64);
7263                old_d = newTemp(Ity_I64);
7264                new_m = newTemp(Ity_I64);
7265                new_d = newTemp(Ity_I64);
7266                assign(old_m, getDRegI64(mreg));
7267                assign(old_d, getDRegI64(dreg));
7268             }
7269             if (Q) {
7270                switch (size) {
7271                   case 0:
7272                      op_odd  = Iop_InterleaveOddLanes8x16;
7273                      op_even = Iop_InterleaveEvenLanes8x16;
7274                      break;
7275                   case 1:
7276                      op_odd  = Iop_InterleaveOddLanes16x8;
7277                      op_even = Iop_InterleaveEvenLanes16x8;
7278                      break;
7279                   case 2:
7280                      op_odd  = Iop_InterleaveOddLanes32x4;
7281                      op_even = Iop_InterleaveEvenLanes32x4;
7282                      break;
7283                   case 3:
7284                      return False;
7285                   default:
7286                      vassert(0);
7287                }
7288             } else {
7289                switch (size) {
7290                   case 0:
7291                      op_odd  = Iop_InterleaveOddLanes8x8;
7292                      op_even = Iop_InterleaveEvenLanes8x8;
7293                      break;
7294                   case 1:
7295                      op_odd  = Iop_InterleaveOddLanes16x4;
7296                      op_even = Iop_InterleaveEvenLanes16x4;
7297                      break;
7298                   case 2:
7299                      op_odd  = Iop_InterleaveHI32x2;
7300                      op_even = Iop_InterleaveLO32x2;
7301                      break;
7302                   case 3:
7303                      return False;
7304                   default:
7305                      vassert(0);
7306                }
7307             }
7308             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7309             assign(new_m, binop(op_odd, mkexpr(old_m), mkexpr(old_d)));
7310             if (Q) {
7311                putQReg(dreg, mkexpr(new_d), condT);
7312                putQReg(mreg, mkexpr(new_m), condT);
7313             } else {
7314                putDRegI64(dreg, mkexpr(new_d), condT);
7315                putDRegI64(mreg, mkexpr(new_m), condT);
7316             }
7317             DIP("vtrn.%d %c%u, %c%u\n",
7318                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7319             return True;
7320          } else if ((B >> 1) == 2) {
7321             /* VUZP */
7322             IROp op_even, op_odd;
7323             IRTemp old_m, old_d, new_m, new_d;
7324             if (!Q && size == 2)
7325                return False;
7326             if (Q) {
7327                old_m = newTemp(Ity_V128);
7328                old_d = newTemp(Ity_V128);
7329                new_m = newTemp(Ity_V128);
7330                new_d = newTemp(Ity_V128);
7331                assign(old_m, getQReg(mreg));
7332                assign(old_d, getQReg(dreg));
7333             } else {
7334                old_m = newTemp(Ity_I64);
7335                old_d = newTemp(Ity_I64);
7336                new_m = newTemp(Ity_I64);
7337                new_d = newTemp(Ity_I64);
7338                assign(old_m, getDRegI64(mreg));
7339                assign(old_d, getDRegI64(dreg));
7340             }
7341             switch (size) {
7342                case 0:
7343                   op_odd  = Q ? Iop_CatOddLanes8x16 : Iop_CatOddLanes8x8;
7344                   op_even = Q ? Iop_CatEvenLanes8x16 : Iop_CatEvenLanes8x8;
7345                   break;
7346                case 1:
7347                   op_odd  = Q ? Iop_CatOddLanes16x8 : Iop_CatOddLanes16x4;
7348                   op_even = Q ? Iop_CatEvenLanes16x8 : Iop_CatEvenLanes16x4;
7349                   break;
7350                case 2:
7351                   op_odd  = Iop_CatOddLanes32x4;
7352                   op_even = Iop_CatEvenLanes32x4;
7353                   break;
7354                case 3:
7355                   return False;
7356                default:
7357                   vassert(0);
7358             }
7359             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7360             assign(new_m, binop(op_odd,  mkexpr(old_m), mkexpr(old_d)));
7361             if (Q) {
7362                putQReg(dreg, mkexpr(new_d), condT);
7363                putQReg(mreg, mkexpr(new_m), condT);
7364             } else {
7365                putDRegI64(dreg, mkexpr(new_d), condT);
7366                putDRegI64(mreg, mkexpr(new_m), condT);
7367             }
7368             DIP("vuzp.%d %c%u, %c%u\n",
7369                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7370             return True;
7371          } else if ((B >> 1) == 3) {
7372             /* VZIP */
7373             IROp op_lo, op_hi;
7374             IRTemp old_m, old_d, new_m, new_d;
7375             if (!Q && size == 2)
7376                return False;
7377             if (Q) {
7378                old_m = newTemp(Ity_V128);
7379                old_d = newTemp(Ity_V128);
7380                new_m = newTemp(Ity_V128);
7381                new_d = newTemp(Ity_V128);
7382                assign(old_m, getQReg(mreg));
7383                assign(old_d, getQReg(dreg));
7384             } else {
7385                old_m = newTemp(Ity_I64);
7386                old_d = newTemp(Ity_I64);
7387                new_m = newTemp(Ity_I64);
7388                new_d = newTemp(Ity_I64);
7389                assign(old_m, getDRegI64(mreg));
7390                assign(old_d, getDRegI64(dreg));
7391             }
7392             switch (size) {
7393                case 0:
7394                   op_hi = Q ? Iop_InterleaveHI8x16 : Iop_InterleaveHI8x8;
7395                   op_lo = Q ? Iop_InterleaveLO8x16 : Iop_InterleaveLO8x8;
7396                   break;
7397                case 1:
7398                   op_hi = Q ? Iop_InterleaveHI16x8 : Iop_InterleaveHI16x4;
7399                   op_lo = Q ? Iop_InterleaveLO16x8 : Iop_InterleaveLO16x4;
7400                   break;
7401                case 2:
7402                   op_hi = Iop_InterleaveHI32x4;
7403                   op_lo = Iop_InterleaveLO32x4;
7404                   break;
7405                case 3:
7406                   return False;
7407                default:
7408                   vassert(0);
7409             }
7410             assign(new_d, binop(op_lo, mkexpr(old_m), mkexpr(old_d)));
7411             assign(new_m, binop(op_hi, mkexpr(old_m), mkexpr(old_d)));
7412             if (Q) {
7413                putQReg(dreg, mkexpr(new_d), condT);
7414                putQReg(mreg, mkexpr(new_m), condT);
7415             } else {
7416                putDRegI64(dreg, mkexpr(new_d), condT);
7417                putDRegI64(mreg, mkexpr(new_m), condT);
7418             }
7419             DIP("vzip.%d %c%u, %c%u\n",
7420                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7421             return True;
7422          } else if (B == 8) {
7423             /* VMOVN */
7424             IROp op;
7425             mreg >>= 1;
7426             switch (size) {
7427                case 0: op = Iop_NarrowUn16to8x8;  break;
7428                case 1: op = Iop_NarrowUn32to16x4; break;
7429                case 2: op = Iop_NarrowUn64to32x2; break;
7430                case 3: return False;
7431                default: vassert(0);
7432             }
7433             putDRegI64(dreg, unop(op, getQReg(mreg)), condT);
7434             DIP("vmovn.i%d d%u, q%u\n", 16 << size, dreg, mreg);
7435             return True;
7436          } else if (B == 9 || (B >> 1) == 5) {
7437             /* VQMOVN, VQMOVUN */
7438             IROp op, op2;
7439             IRTemp tmp;
7440             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
7441             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
7442             if (mreg & 1)
7443                return False;
7444             mreg >>= 1;
7445             switch (size) {
7446                case 0: op2 = Iop_NarrowUn16to8x8;  break;
7447                case 1: op2 = Iop_NarrowUn32to16x4; break;
7448                case 2: op2 = Iop_NarrowUn64to32x2; break;
7449                case 3: return False;
7450                default: vassert(0);
7451             }
7452             switch (B & 3) {
7453                case 0:
7454                   vassert(0);
7455                case 1:
7456                   switch (size) {
7457                      case 0: op = Iop_QNarrowUn16Sto8Ux8;  break;
7458                      case 1: op = Iop_QNarrowUn32Sto16Ux4; break;
7459                      case 2: op = Iop_QNarrowUn64Sto32Ux2; break;
7460                      case 3: return False;
7461                      default: vassert(0);
7462                   }
7463                   DIP("vqmovun.s%d d%u, q%u\n", 16 << size, dreg, mreg);
7464                   break;
7465                case 2:
7466                   switch (size) {
7467                      case 0: op = Iop_QNarrowUn16Sto8Sx8;  break;
7468                      case 1: op = Iop_QNarrowUn32Sto16Sx4; break;
7469                      case 2: op = Iop_QNarrowUn64Sto32Sx2; break;
7470                      case 3: return False;
7471                      default: vassert(0);
7472                   }
7473                   DIP("vqmovn.s%d d%u, q%u\n", 16 << size, dreg, mreg);
7474                   break;
7475                case 3:
7476                   switch (size) {
7477                      case 0: op = Iop_QNarrowUn16Uto8Ux8;  break;
7478                      case 1: op = Iop_QNarrowUn32Uto16Ux4; break;
7479                      case 2: op = Iop_QNarrowUn64Uto32Ux2; break;
7480                      case 3: return False;
7481                      default: vassert(0);
7482                   }
7483                   DIP("vqmovn.u%d d%u, q%u\n", 16 << size, dreg, mreg);
7484                   break;
7485                default:
7486                   vassert(0);
7487             }
7488             res = newTemp(Ity_I64);
7489             tmp = newTemp(Ity_I64);
7490             assign(res, unop(op, getQReg(mreg)));
7491             assign(tmp, unop(op2, getQReg(mreg)));
7492             setFlag_QC(mkexpr(res), mkexpr(tmp), False, condT);
7493             putDRegI64(dreg, mkexpr(res), condT);
7494             return True;
7495          } else if (B == 12) {
7496             /* VSHLL (maximum shift) */
7497             IROp op, cvt;
7498             UInt shift_imm;
7499             if (Q)
7500                return False;
7501             if (dreg & 1)
7502                return False;
7503             dreg >>= 1;
7504             shift_imm = 8 << size;
7505             res = newTemp(Ity_V128);
7506             switch (size) {
7507                case 0: op = Iop_ShlN16x8; cvt = Iop_Widen8Uto16x8;  break;
7508                case 1: op = Iop_ShlN32x4; cvt = Iop_Widen16Uto32x4; break;
7509                case 2: op = Iop_ShlN64x2; cvt = Iop_Widen32Uto64x2; break;
7510                case 3: return False;
7511                default: vassert(0);
7512             }
7513             assign(res, binop(op, unop(cvt, getDRegI64(mreg)),
7514                                   mkU8(shift_imm)));
7515             putQReg(dreg, mkexpr(res), condT);
7516             DIP("vshll.i%d q%u, d%u, #%d\n", 8 << size, dreg, mreg, 8 << size);
7517             return True;
7518          } else if ((B >> 3) == 3 && (B & 3) == 0) {
7519             /* VCVT (half<->single) */
7520             /* Half-precision extensions are needed to run this */
7521             vassert(0); // ATC
7522             if (((theInstr >> 18) & 3) != 1)
7523                return False;
7524             if ((theInstr >> 8) & 1) {
7525                if (dreg & 1)
7526                   return False;
7527                dreg >>= 1;
7528                putQReg(dreg, unop(Iop_F16toF32x4, getDRegI64(mreg)),
7529                      condT);
7530                DIP("vcvt.f32.f16 q%u, d%u\n", dreg, mreg);
7531             } else {
7532                if (mreg & 1)
7533                   return False;
7534                mreg >>= 1;
7535                putDRegI64(dreg, unop(Iop_F32toF16x4_DEP, getQReg(mreg)),
7536                                 condT);
7537                DIP("vcvt.f16.f32 d%u, q%u\n", dreg, mreg);
7538             }
7539             return True;
7540          } else {
7541             return False;
7542          }
7543          vassert(0);
7544          return True;
7545       case 3:
7546          if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,0)) {
7547             /* VRECPE */
7548             IROp op;
7549             F = (theInstr >> 8) & 1;
7550             if (size != 2)
7551                return False;
7552             if (Q) {
7553                op = F ? Iop_RecipEst32Fx4 : Iop_RecipEst32Ux4;
7554                putQReg(dreg, unop(op, getQReg(mreg)), condT);
7555                DIP("vrecpe.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7556             } else {
7557                op = F ? Iop_RecipEst32Fx2 : Iop_RecipEst32Ux2;
7558                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7559                DIP("vrecpe.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7560             }
7561             return True;
7562          } else if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,1)) {
7563             /* VRSQRTE */
7564             IROp op;
7565             F = (B >> 2) & 1;
7566             if (size != 2)
7567                return False;
7568             if (F) {
7569                /* fp */
7570                op = Q ? Iop_RSqrtEst32Fx4 : Iop_RSqrtEst32Fx2;
7571             } else {
7572                /* unsigned int */
7573                op = Q ? Iop_RSqrtEst32Ux4 : Iop_RSqrtEst32Ux2;
7574             }
7575             if (Q) {
7576                putQReg(dreg, unop(op, getQReg(mreg)), condT);
7577                DIP("vrsqrte.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7578             } else {
7579                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7580                DIP("vrsqrte.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7581             }
7582             return True;
7583          } else if ((B >> 3) == 3) {
7584             /* VCVT (fp<->integer) */
7585             IROp op;
7586             if (size != 2)
7587                return False;
7588             switch ((B >> 1) & 3) {
7589                case 0:
7590                   op = Q ? Iop_I32StoF32x4_DEP : Iop_I32StoF32x2_DEP;
7591                   DIP("vcvt.f32.s32 %c%u, %c%u\n",
7592                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7593                   break;
7594                case 1:
7595                   op = Q ? Iop_I32UtoF32x4_DEP : Iop_I32UtoF32x2_DEP;
7596                   DIP("vcvt.f32.u32 %c%u, %c%u\n",
7597                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7598                   break;
7599                case 2:
7600                   op = Q ? Iop_F32toI32Sx4_RZ : Iop_F32toI32Sx2_RZ;
7601                   DIP("vcvt.s32.f32 %c%u, %c%u\n",
7602                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7603                   break;
7604                case 3:
7605                   op = Q ? Iop_F32toI32Ux4_RZ : Iop_F32toI32Ux2_RZ;
7606                   DIP("vcvt.u32.f32 %c%u, %c%u\n",
7607                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7608                   break;
7609                default:
7610                   vassert(0);
7611             }
7612             if (Q) {
7613                putQReg(dreg, unop(op, getQReg(mreg)), condT);
7614             } else {
7615                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7616             }
7617             return True;
7618          } else {
7619             return False;
7620          }
7621          vassert(0);
7622          return True;
7623       default:
7624          vassert(0);
7625    }
7626    return False;
7627 }
7628
7629 /* A7.4.6 One register and a modified immediate value */
7630 static
7631 void ppNeonImm(UInt imm, UInt cmode, UInt op)
7632 {
7633    int i;
7634    switch (cmode) {
7635       case 0: case 1: case 8: case 9:
7636          vex_printf("0x%x", imm);
7637          break;
7638       case 2: case 3: case 10: case 11:
7639          vex_printf("0x%x00", imm);
7640          break;
7641       case 4: case 5:
7642          vex_printf("0x%x0000", imm);
7643          break;
7644       case 6: case 7:
7645          vex_printf("0x%x000000", imm);
7646          break;
7647       case 12:
7648          vex_printf("0x%xff", imm);
7649          break;
7650       case 13:
7651          vex_printf("0x%xffff", imm);
7652          break;
7653       case 14:
7654          if (op) {
7655             vex_printf("0x");
7656             for (i = 7; i >= 0; i--)
7657                vex_printf("%s", (imm & (1 << i)) ? "ff" : "00");
7658          } else {
7659             vex_printf("0x%x", imm);
7660          }
7661          break;
7662       case 15:
7663          vex_printf("0x%x", imm);
7664          break;
7665    }
7666 }
7667
7668 static
7669 const char *ppNeonImmType(UInt cmode, UInt op)
7670 {
7671    switch (cmode) {
7672       case 0 ... 7:
7673       case 12: case 13:
7674          return "i32";
7675       case 8 ... 11:
7676          return "i16";
7677       case 14:
7678          if (op)
7679             return "i64";
7680          else
7681             return "i8";
7682       case 15:
7683          if (op)
7684             vassert(0);
7685          else
7686             return "f32";
7687       default:
7688          vassert(0);
7689    }
7690 }
7691
7692 static
7693 void DIPimm(UInt imm, UInt cmode, UInt op,
7694             const char *instr, UInt Q, UInt dreg)
7695 {
7696    if (vex_traceflags & VEX_TRACE_FE) {
7697       vex_printf("%s.%s %c%u, #", instr,
7698                  ppNeonImmType(cmode, op), Q ? 'q' : 'd', dreg);
7699       ppNeonImm(imm, cmode, op);
7700       vex_printf("\n");
7701    }
7702 }
7703
7704 static
7705 Bool dis_neon_data_1reg_and_imm ( UInt theInstr, IRTemp condT )
7706 {
7707    UInt dreg = get_neon_d_regno(theInstr);
7708    ULong imm_raw = ((theInstr >> 17) & 0x80) | ((theInstr >> 12) & 0x70) |
7709                   (theInstr & 0xf);
7710    ULong imm_raw_pp = imm_raw;
7711    UInt cmode = (theInstr >> 8) & 0xf;
7712    UInt op_bit = (theInstr >> 5) & 1;
7713    ULong imm = 0;
7714    UInt Q = (theInstr >> 6) & 1;
7715    int i, j;
7716    UInt tmp;
7717    IRExpr *imm_val;
7718    IRExpr *expr;
7719    IRTemp tmp_var;
7720    switch(cmode) {
7721       case 7: case 6:
7722          imm_raw = imm_raw << 8;
7723          /* fallthrough */
7724       case 5: case 4:
7725          imm_raw = imm_raw << 8;
7726          /* fallthrough */
7727       case 3: case 2:
7728          imm_raw = imm_raw << 8;
7729          /* fallthrough */
7730       case 0: case 1:
7731          imm = (imm_raw << 32) | imm_raw;
7732          break;
7733       case 11: case 10:
7734          imm_raw = imm_raw << 8;
7735          /* fallthrough */
7736       case 9: case 8:
7737          imm_raw = (imm_raw << 16) | imm_raw;
7738          imm = (imm_raw << 32) | imm_raw;
7739          break;
7740       case 13:
7741          imm_raw = (imm_raw << 8) | 0xff;
7742          /* fallthrough */
7743       case 12:
7744          imm_raw = (imm_raw << 8) | 0xff;
7745          imm = (imm_raw << 32) | imm_raw;
7746          break;
7747       case 14:
7748          if (! op_bit) {
7749             for(i = 0; i < 8; i++) {
7750                imm = (imm << 8) | imm_raw;
7751             }
7752          } else {
7753             for(i = 7; i >= 0; i--) {
7754                tmp = 0;
7755                for(j = 0; j < 8; j++) {
7756                   tmp = (tmp << 1) | ((imm_raw >> i) & 1);
7757                }
7758                imm = (imm << 8) | tmp;
7759             }
7760          }
7761          break;
7762       case 15:
7763          imm = (imm_raw & 0x80) << 5;
7764          imm |= ((~imm_raw & 0x40) << 5);
7765          for(i = 1; i <= 4; i++)
7766             imm |= (imm_raw & 0x40) << i;
7767          imm |= (imm_raw & 0x7f);
7768          imm = imm << 19;
7769          imm = (imm << 32) | imm;
7770          break;
7771       default:
7772          return False;
7773    }
7774    if (Q) {
7775       imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
7776    } else {
7777       imm_val = mkU64(imm);
7778    }
7779    if (((op_bit == 0) &&
7780       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 12) == 12))) ||
7781       ((op_bit == 1) && (cmode == 14))) {
7782       /* VMOV (immediate) */
7783       if (Q) {
7784          putQReg(dreg, imm_val, condT);
7785       } else {
7786          putDRegI64(dreg, imm_val, condT);
7787       }
7788       DIPimm(imm_raw_pp, cmode, op_bit, "vmov", Q, dreg);
7789       return True;
7790    }
7791    if ((op_bit == 1) &&
7792       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 14) == 12))) {
7793       /* VMVN (immediate) */
7794       if (Q) {
7795          putQReg(dreg, unop(Iop_NotV128, imm_val), condT);
7796       } else {
7797          putDRegI64(dreg, unop(Iop_Not64, imm_val), condT);
7798       }
7799       DIPimm(imm_raw_pp, cmode, op_bit, "vmvn", Q, dreg);
7800       return True;
7801    }
7802    if (Q) {
7803       tmp_var = newTemp(Ity_V128);
7804       assign(tmp_var, getQReg(dreg));
7805    } else {
7806       tmp_var = newTemp(Ity_I64);
7807       assign(tmp_var, getDRegI64(dreg));
7808    }
7809    if ((op_bit == 0) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7810       /* VORR (immediate) */
7811       if (Q)
7812          expr = binop(Iop_OrV128, mkexpr(tmp_var), imm_val);
7813       else
7814          expr = binop(Iop_Or64, mkexpr(tmp_var), imm_val);
7815       DIPimm(imm_raw_pp, cmode, op_bit, "vorr", Q, dreg);
7816    } else if ((op_bit == 1) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7817       /* VBIC (immediate) */
7818       if (Q)
7819          expr = binop(Iop_AndV128, mkexpr(tmp_var),
7820                                    unop(Iop_NotV128, imm_val));
7821       else
7822          expr = binop(Iop_And64, mkexpr(tmp_var), unop(Iop_Not64, imm_val));
7823       DIPimm(imm_raw_pp, cmode, op_bit, "vbic", Q, dreg);
7824    } else {
7825       return False;
7826    }
7827    if (Q)
7828       putQReg(dreg, expr, condT);
7829    else
7830       putDRegI64(dreg, expr, condT);
7831    return True;
7832 }
7833
7834 /* A7.4 Advanced SIMD data-processing instructions */
7835 static
7836 Bool dis_neon_data_processing ( UInt theInstr, IRTemp condT )
7837 {
7838    UInt A = (theInstr >> 19) & 0x1F;
7839    UInt B = (theInstr >>  8) & 0xF;
7840    UInt C = (theInstr >>  4) & 0xF;
7841    UInt U = (theInstr >> 24) & 0x1;
7842
7843    if (! (A & 0x10)) {
7844       return dis_neon_data_3same(theInstr, condT);
7845    }
7846    if (((A & 0x17) == 0x10) && ((C & 0x9) == 0x1)) {
7847       return dis_neon_data_1reg_and_imm(theInstr, condT);
7848    }
7849    if ((C & 1) == 1) {
7850       return dis_neon_data_2reg_and_shift(theInstr, condT);
7851    }
7852    if (((C & 5) == 0) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7853       return dis_neon_data_3diff(theInstr, condT);
7854    }
7855    if (((C & 5) == 4) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7856       return dis_neon_data_2reg_and_scalar(theInstr, condT);
7857    }
7858    if ((A & 0x16) == 0x16) {
7859       if ((U == 0) && ((C & 1) == 0)) {
7860          return dis_neon_vext(theInstr, condT);
7861       }
7862       if ((U != 1) || ((C & 1) == 1))
7863          return False;
7864       if ((B & 8) == 0) {
7865          return dis_neon_data_2reg_misc(theInstr, condT);
7866       }
7867       if ((B & 12) == 8) {
7868          return dis_neon_vtb(theInstr, condT);
7869       }
7870       if ((B == 12) && ((C & 9) == 0)) {
7871          return dis_neon_vdup(theInstr, condT);
7872       }
7873    }
7874    return False;
7875 }
7876
7877
7878 /*------------------------------------------------------------*/
7879 /*--- NEON loads and stores                                ---*/
7880 /*------------------------------------------------------------*/
7881
7882 /* For NEON memory operations, we use the standard scheme to handle
7883    conditionalisation: generate a jump around the instruction if the
7884    condition is false.  That's only necessary in Thumb mode, however,
7885    since in ARM mode NEON instructions are unconditional. */
7886
7887 /* A helper function for what follows.  It assumes we already went
7888    uncond as per comments at the top of this section. */
7889 static
7890 void mk_neon_elem_load_to_one_lane( UInt rD, UInt inc, UInt index,
7891                                     UInt N, UInt size, IRTemp addr )
7892 {
7893    UInt i;
7894    switch (size) {
7895       case 0:
7896          putDRegI64(rD, triop(Iop_SetElem8x8, getDRegI64(rD), mkU8(index),
7897                     loadLE(Ity_I8, mkexpr(addr))), IRTemp_INVALID);
7898          break;
7899       case 1:
7900          putDRegI64(rD, triop(Iop_SetElem16x4, getDRegI64(rD), mkU8(index),
7901                     loadLE(Ity_I16, mkexpr(addr))), IRTemp_INVALID);
7902          break;
7903       case 2:
7904          putDRegI64(rD, triop(Iop_SetElem32x2, getDRegI64(rD), mkU8(index),
7905                     loadLE(Ity_I32, mkexpr(addr))), IRTemp_INVALID);
7906          break;
7907       default:
7908          vassert(0);
7909    }
7910    for (i = 1; i <= N; i++) {
7911       switch (size) {
7912          case 0:
7913             putDRegI64(rD + i * inc,
7914                        triop(Iop_SetElem8x8,
7915                              getDRegI64(rD + i * inc),
7916                              mkU8(index),
7917                              loadLE(Ity_I8, binop(Iop_Add32,
7918                                                   mkexpr(addr),
7919                                                   mkU32(i * 1)))),
7920                        IRTemp_INVALID);
7921             break;
7922          case 1:
7923             putDRegI64(rD + i * inc,
7924                        triop(Iop_SetElem16x4,
7925                              getDRegI64(rD + i * inc),
7926                              mkU8(index),
7927                              loadLE(Ity_I16, binop(Iop_Add32,
7928                                                    mkexpr(addr),
7929                                                    mkU32(i * 2)))),
7930                        IRTemp_INVALID);
7931             break;
7932          case 2:
7933             putDRegI64(rD + i * inc,
7934                        triop(Iop_SetElem32x2,
7935                              getDRegI64(rD + i * inc),
7936                              mkU8(index),
7937                              loadLE(Ity_I32, binop(Iop_Add32,
7938                                                    mkexpr(addr),
7939                                                    mkU32(i * 4)))),
7940                        IRTemp_INVALID);
7941             break;
7942          default:
7943             vassert(0);
7944       }
7945    }
7946 }
7947
7948 /* A(nother) helper function for what follows.  It assumes we already
7949    went uncond as per comments at the top of this section. */
7950 static
7951 void mk_neon_elem_store_from_one_lane( UInt rD, UInt inc, UInt index,
7952                                        UInt N, UInt size, IRTemp addr )
7953 {
7954    UInt i;
7955    switch (size) {
7956       case 0:
7957          storeLE(mkexpr(addr),
7958                  binop(Iop_GetElem8x8, getDRegI64(rD), mkU8(index)));
7959          break;
7960       case 1:
7961          storeLE(mkexpr(addr),
7962                  binop(Iop_GetElem16x4, getDRegI64(rD), mkU8(index)));
7963          break;
7964       case 2:
7965          storeLE(mkexpr(addr),
7966                  binop(Iop_GetElem32x2, getDRegI64(rD), mkU8(index)));
7967          break;
7968       default:
7969          vassert(0);
7970    }
7971    for (i = 1; i <= N; i++) {
7972       switch (size) {
7973          case 0:
7974             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 1)),
7975                     binop(Iop_GetElem8x8, getDRegI64(rD + i * inc),
7976                                           mkU8(index)));
7977             break;
7978          case 1:
7979             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 2)),
7980                     binop(Iop_GetElem16x4, getDRegI64(rD + i * inc),
7981                                            mkU8(index)));
7982             break;
7983          case 2:
7984             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 4)),
7985                     binop(Iop_GetElem32x2, getDRegI64(rD + i * inc),
7986                                            mkU8(index)));
7987             break;
7988          default:
7989             vassert(0);
7990       }
7991    }
7992 }
7993
7994 /* Generate 2x64 -> 2x64 deinterleave code, for VLD2.  Caller must
7995    make *u0 and *u1 be valid IRTemps before the call. */
7996 static void math_DEINTERLEAVE_2 (/*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
7997                                  IRTemp i0, IRTemp i1, Int laneszB)
7998 {
7999    /* The following assumes that the guest is little endian, and hence
8000       that the memory-side (interleaved) data is stored
8001       little-endianly. */
8002    vassert(u0 && u1);
8003    /* This is pretty easy, since we have primitives directly to
8004       hand. */
8005    if (laneszB == 4) {
8006       // memLE(128 bits) == A0 B0 A1 B1
8007       // i0 == B0 A0, i1 == B1 A1
8008       // u0 == A1 A0, u1 == B1 B0
8009       assign(*u0, binop(Iop_InterleaveLO32x2, mkexpr(i1), mkexpr(i0)));
8010       assign(*u1, binop(Iop_InterleaveHI32x2, mkexpr(i1), mkexpr(i0)));
8011    } else if (laneszB == 2) {
8012       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
8013       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
8014       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
8015       assign(*u0, binop(Iop_CatEvenLanes16x4, mkexpr(i1), mkexpr(i0)));
8016       assign(*u1, binop(Iop_CatOddLanes16x4,  mkexpr(i1), mkexpr(i0)));
8017    } else if (laneszB == 1) {
8018       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
8019       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
8020       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
8021       assign(*u0, binop(Iop_CatEvenLanes8x8, mkexpr(i1), mkexpr(i0)));
8022       assign(*u1, binop(Iop_CatOddLanes8x8,  mkexpr(i1), mkexpr(i0)));
8023    } else {
8024       // Can never happen, since VLD2 only has valid lane widths of 32,
8025       // 16 or 8 bits.
8026       vpanic("math_DEINTERLEAVE_2");
8027    }
8028 }
8029
8030 /* Generate 2x64 -> 2x64 interleave code, for VST2.  Caller must make
8031    *u0 and *u1 be valid IRTemps before the call. */
8032 static void math_INTERLEAVE_2 (/*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
8033                                IRTemp u0, IRTemp u1, Int laneszB)
8034 {
8035    /* The following assumes that the guest is little endian, and hence
8036       that the memory-side (interleaved) data is stored
8037       little-endianly. */
8038    vassert(i0 && i1);
8039    /* This is pretty easy, since we have primitives directly to
8040       hand. */
8041    if (laneszB == 4) {
8042       // memLE(128 bits) == A0 B0 A1 B1
8043       // i0 == B0 A0, i1 == B1 A1
8044       // u0 == A1 A0, u1 == B1 B0
8045       assign(*i0, binop(Iop_InterleaveLO32x2, mkexpr(u1), mkexpr(u0)));
8046       assign(*i1, binop(Iop_InterleaveHI32x2, mkexpr(u1), mkexpr(u0)));
8047    } else if (laneszB == 2) {
8048       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
8049       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
8050       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
8051       assign(*i0, binop(Iop_InterleaveLO16x4, mkexpr(u1), mkexpr(u0)));
8052       assign(*i1, binop(Iop_InterleaveHI16x4, mkexpr(u1), mkexpr(u0)));
8053    } else if (laneszB == 1) {
8054       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
8055       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
8056       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
8057       assign(*i0, binop(Iop_InterleaveLO8x8, mkexpr(u1), mkexpr(u0)));
8058       assign(*i1, binop(Iop_InterleaveHI8x8, mkexpr(u1), mkexpr(u0)));
8059    } else {
8060       // Can never happen, since VST2 only has valid lane widths of 32,
8061       // 16 or 8 bits.
8062       vpanic("math_INTERLEAVE_2");
8063    }
8064 }
8065
8066 // Helper function for generating arbitrary slicing 'n' dicing of
8067 // 3 8x8 vectors, as needed for VLD3.8 and VST3.8.
8068 static IRExpr* math_PERM_8x8x3(const UChar* desc,
8069                                IRTemp s0, IRTemp s1, IRTemp s2)
8070 {
8071    // desc is an array of 8 pairs, encoded as 16 bytes,
8072    // that describe how to assemble the result lanes, starting with
8073    // lane 7.  Each pair is: first component (0..2) says which of
8074    // s0/s1/s2 to use.  Second component (0..7) is the lane number
8075    // in the source to use.
8076    UInt si;
8077    for (si = 0; si < 7; si++) {
8078       vassert(desc[2 * si + 0] <= 2);
8079       vassert(desc[2 * si + 1] <= 7);
8080    }
8081    IRTemp h3 = newTemp(Ity_I64);
8082    IRTemp h2 = newTemp(Ity_I64);
8083    IRTemp h1 = newTemp(Ity_I64);
8084    IRTemp h0 = newTemp(Ity_I64);
8085    IRTemp srcs[3] = {s0, s1, s2};
8086 #  define SRC_VEC(_lane)   mkexpr(srcs[desc[2 * (7-(_lane)) + 0]])
8087 #  define SRC_SHIFT(_lane) mkU8(56-8*(desc[2 * (7-(_lane)) + 1]))
8088    assign(h3, binop(Iop_InterleaveHI8x8,
8089                     binop(Iop_Shl64, SRC_VEC(7), SRC_SHIFT(7)),
8090                     binop(Iop_Shl64, SRC_VEC(6), SRC_SHIFT(6))));
8091    assign(h2, binop(Iop_InterleaveHI8x8,
8092                     binop(Iop_Shl64, SRC_VEC(5), SRC_SHIFT(5)),
8093                     binop(Iop_Shl64, SRC_VEC(4), SRC_SHIFT(4))));
8094    assign(h1, binop(Iop_InterleaveHI8x8,
8095                     binop(Iop_Shl64, SRC_VEC(3), SRC_SHIFT(3)),
8096                     binop(Iop_Shl64, SRC_VEC(2), SRC_SHIFT(2))));
8097    assign(h0, binop(Iop_InterleaveHI8x8,
8098                     binop(Iop_Shl64, SRC_VEC(1), SRC_SHIFT(1)),
8099                     binop(Iop_Shl64, SRC_VEC(0), SRC_SHIFT(0))));
8100 #  undef SRC_VEC
8101 #  undef SRC_SHIFT
8102    // Now h3..h0 are 64 bit vectors with useful information only
8103    // in the top 16 bits.  We now concatentate those four 16-bit
8104    // groups so as to produce the final result.
8105    IRTemp w1 = newTemp(Ity_I64);
8106    IRTemp w0 = newTemp(Ity_I64);
8107    assign(w1, binop(Iop_InterleaveHI16x4, mkexpr(h3), mkexpr(h2)));
8108    assign(w0, binop(Iop_InterleaveHI16x4, mkexpr(h1), mkexpr(h0)));
8109    return binop(Iop_InterleaveHI32x2, mkexpr(w1), mkexpr(w0));
8110 }
8111
8112 /* Generate 3x64 -> 3x64 deinterleave code, for VLD3.  Caller must
8113    make *u0, *u1 and *u2 be valid IRTemps before the call. */
8114 static void math_DEINTERLEAVE_3 (
8115                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1, /*OUT*/IRTemp* u2,
8116                IRTemp i0, IRTemp i1, IRTemp i2, Int laneszB
8117             )
8118 {
8119 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8120 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8121 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8122    /* The following assumes that the guest is little endian, and hence
8123       that the memory-side (interleaved) data is stored
8124       little-endianly. */
8125    vassert(u0 && u1 && u2);
8126    if (laneszB == 4) {
8127       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8128       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8129       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8130       assign(*u0, IHI32x2(SHL64(i1,  0), SHL64(i0, 32)));
8131       assign(*u1, IHI32x2(SHL64(i2, 32), SHL64(i0,  0)));
8132       assign(*u2, IHI32x2(SHL64(i2,  0), SHL64(i1, 32)));
8133    } else if (laneszB == 2) {
8134       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8135       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8136       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8137 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8138                 IHI32x2(                                      \
8139                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8140                            SHL64((_tmp2),48-16*(_la2))),      \
8141                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8142                            SHL64((_tmp0),48-16*(_la0))))
8143       assign(*u0, XXX(i2,1, i1,2, i0,3, i0,0));
8144       assign(*u1, XXX(i2,2, i1,3, i1,0, i0,1));
8145       assign(*u2, XXX(i2,3, i2,0, i1,1, i0,2));
8146 #     undef XXX
8147    } else if (laneszB == 1) {
8148       // These describe how the result vectors [7..0] are
8149       // assembled from the source vectors.  Each pair is
8150       // (source vector number, lane number).
8151       static const UChar de0[16] = {2,5, 2,2, 1,7, 1,4, 1,1, 0,6, 0,3, 0,0};
8152       static const UChar de1[16] = {2,6, 2,3, 2,0, 1,5, 1,2, 0,7, 0,4, 0,1};
8153       static const UChar de2[16] = {2,7, 2,4, 2,1, 1,6, 1,3, 1,0, 0,5, 0,2};
8154       assign(*u0, math_PERM_8x8x3(de0, i0, i1, i2));
8155       assign(*u1, math_PERM_8x8x3(de1, i0, i1, i2));
8156       assign(*u2, math_PERM_8x8x3(de2, i0, i1, i2));
8157    } else {
8158       // Can never happen, since VLD3 only has valid lane widths of 32,
8159       // 16 or 8 bits.
8160       vpanic("math_DEINTERLEAVE_3");
8161    }
8162 #  undef SHL64
8163 #  undef IHI16x4
8164 #  undef IHI32x2
8165 }
8166
8167 /* Generate 3x64 -> 3x64 interleave code, for VST3.  Caller must
8168    make *i0, *i1 and *i2 be valid IRTemps before the call. */
8169 static void math_INTERLEAVE_3 (
8170                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1, /*OUT*/IRTemp* i2,
8171                IRTemp u0, IRTemp u1, IRTemp u2, Int laneszB
8172             )
8173 {
8174 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8175 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8176 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8177    /* The following assumes that the guest is little endian, and hence
8178       that the memory-side (interleaved) data is stored
8179       little-endianly. */
8180    vassert(i0 && i1 && i2);
8181    if (laneszB == 4) {
8182       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8183       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8184       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8185       assign(*i0, IHI32x2(SHL64(u1, 32), SHL64(u0, 32)));
8186       assign(*i1, IHI32x2(SHL64(u0,  0), SHL64(u2, 32)));
8187       assign(*i2, IHI32x2(SHL64(u2,  0), SHL64(u1,  0)));
8188    } else if (laneszB == 2) {
8189       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8190       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8191       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8192 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8193                 IHI32x2(                                      \
8194                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8195                            SHL64((_tmp2),48-16*(_la2))),      \
8196                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8197                            SHL64((_tmp0),48-16*(_la0))))
8198       assign(*i0, XXX(u0,1, u2,0, u1,0, u0,0));
8199       assign(*i1, XXX(u1,2, u0,2, u2,1, u1,1));
8200       assign(*i2, XXX(u2,3, u1,3, u0,3, u2,2));
8201 #     undef XXX
8202    } else if (laneszB == 1) {
8203       // These describe how the result vectors [7..0] are
8204       // assembled from the source vectors.  Each pair is
8205       // (source vector number, lane number).
8206       static const UChar in0[16] = {1,2, 0,2, 2,1, 1,1, 0,1, 2,0, 1,0, 0,0};
8207       static const UChar in1[16] = {0,5, 2,4, 1,4, 0,4, 2,3, 1,3, 0,3, 2,2};
8208       static const UChar in2[16] = {2,7, 1,7, 0,7, 2,6, 1,6, 0,6, 2,5, 1,5};
8209       assign(*i0, math_PERM_8x8x3(in0, u0, u1, u2));
8210       assign(*i1, math_PERM_8x8x3(in1, u0, u1, u2));
8211       assign(*i2, math_PERM_8x8x3(in2, u0, u1, u2));
8212    } else {
8213       // Can never happen, since VST3 only has valid lane widths of 32,
8214       // 16 or 8 bits.
8215       vpanic("math_INTERLEAVE_3");
8216    }
8217 #  undef SHL64
8218 #  undef IHI16x4
8219 #  undef IHI32x2
8220 }
8221
8222 /* Generate 4x64 -> 4x64 deinterleave code, for VLD4.  Caller must
8223    make *u0, *u1, *u2 and *u3 be valid IRTemps before the call. */
8224 static void math_DEINTERLEAVE_4 (
8225                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
8226                /*OUT*/IRTemp* u2, /*OUT*/IRTemp* u3,
8227                IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3, Int laneszB
8228             )
8229 {
8230 #  define IHI32x2(_t1, _t2) \
8231              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8232 #  define ILO32x2(_t1, _t2) \
8233              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8234 #  define IHI16x4(_t1, _t2) \
8235              binop(Iop_InterleaveHI16x4, mkexpr(_t1), mkexpr(_t2))
8236 #  define ILO16x4(_t1, _t2) \
8237              binop(Iop_InterleaveLO16x4, mkexpr(_t1), mkexpr(_t2))
8238 #  define IHI8x8(_t1, _e2) \
8239              binop(Iop_InterleaveHI8x8, mkexpr(_t1), _e2)
8240 #  define SHL64(_tmp, _amt) \
8241              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8242    /* The following assumes that the guest is little endian, and hence
8243       that the memory-side (interleaved) data is stored
8244       little-endianly. */
8245    vassert(u0 && u1 && u2 && u3);
8246    if (laneszB == 4) {
8247       assign(*u0, ILO32x2(i2, i0));
8248       assign(*u1, IHI32x2(i2, i0));
8249       assign(*u2, ILO32x2(i3, i1));
8250       assign(*u3, IHI32x2(i3, i1));
8251    } else if (laneszB == 2) {
8252       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8253       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8254       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8255       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8256       assign(b1b0a1a0, ILO16x4(i1, i0));
8257       assign(b3b2a3a2, ILO16x4(i3, i2));
8258       assign(d1d0c1c0, IHI16x4(i1, i0));
8259       assign(d3d2c3c2, IHI16x4(i3, i2));
8260       // And now do what we did for the 32-bit case.
8261       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8262       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8263       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8264       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8265    } else if (laneszB == 1) {
8266       // Deinterleave into 16-bit chunks, then do as the 16-bit case.
8267       IRTemp i0x = newTemp(Ity_I64);
8268       IRTemp i1x = newTemp(Ity_I64);
8269       IRTemp i2x = newTemp(Ity_I64);
8270       IRTemp i3x = newTemp(Ity_I64);
8271       assign(i0x, IHI8x8(i0, SHL64(i0, 32)));
8272       assign(i1x, IHI8x8(i1, SHL64(i1, 32)));
8273       assign(i2x, IHI8x8(i2, SHL64(i2, 32)));
8274       assign(i3x, IHI8x8(i3, SHL64(i3, 32)));
8275       // From here on is like the 16 bit case.
8276       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8277       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8278       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8279       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8280       assign(b1b0a1a0, ILO16x4(i1x, i0x));
8281       assign(b3b2a3a2, ILO16x4(i3x, i2x));
8282       assign(d1d0c1c0, IHI16x4(i1x, i0x));
8283       assign(d3d2c3c2, IHI16x4(i3x, i2x));
8284       // And now do what we did for the 32-bit case.
8285       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8286       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8287       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8288       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8289    } else {
8290       // Can never happen, since VLD4 only has valid lane widths of 32,
8291       // 16 or 8 bits.
8292       vpanic("math_DEINTERLEAVE_4");
8293    }
8294 #  undef SHL64
8295 #  undef IHI8x8
8296 #  undef ILO16x4
8297 #  undef IHI16x4
8298 #  undef ILO32x2
8299 #  undef IHI32x2
8300 }
8301
8302 /* Generate 4x64 -> 4x64 interleave code, for VST4.  Caller must
8303    make *i0, *i1, *i2 and *i3 be valid IRTemps before the call. */
8304 static void math_INTERLEAVE_4 (
8305                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
8306                /*OUT*/IRTemp* i2, /*OUT*/IRTemp* i3,
8307                IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3, Int laneszB
8308             )
8309 {
8310 #  define IHI32x2(_t1, _t2) \
8311              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8312 #  define ILO32x2(_t1, _t2) \
8313              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8314 #  define CEV16x4(_t1, _t2) \
8315              binop(Iop_CatEvenLanes16x4, mkexpr(_t1), mkexpr(_t2))
8316 #  define COD16x4(_t1, _t2) \
8317              binop(Iop_CatOddLanes16x4, mkexpr(_t1), mkexpr(_t2))
8318 #  define COD8x8(_t1, _e2) \
8319              binop(Iop_CatOddLanes8x8, mkexpr(_t1), _e2)
8320 #  define SHL64(_tmp, _amt) \
8321              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8322    /* The following assumes that the guest is little endian, and hence
8323       that the memory-side (interleaved) data is stored
8324       little-endianly. */
8325    vassert(u0 && u1 && u2 && u3);
8326    if (laneszB == 4) {
8327       assign(*i0, ILO32x2(u1, u0));
8328       assign(*i1, ILO32x2(u3, u2));
8329       assign(*i2, IHI32x2(u1, u0));
8330       assign(*i3, IHI32x2(u3, u2));
8331    } else if (laneszB == 2) {
8332       // First, interleave at the 32-bit lane size.
8333       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8334       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8335       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8336       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8337       assign(b1b0a1a0, ILO32x2(u1, u0));
8338       assign(b3b2a3a2, IHI32x2(u1, u0));
8339       assign(d1d0c1c0, ILO32x2(u3, u2));
8340       assign(d3d2c3c2, IHI32x2(u3, u2));
8341       // And interleave (cat) at the 16 bit size.
8342       assign(*i0, CEV16x4(d1d0c1c0, b1b0a1a0));
8343       assign(*i1, COD16x4(d1d0c1c0, b1b0a1a0));
8344       assign(*i2, CEV16x4(d3d2c3c2, b3b2a3a2));
8345       assign(*i3, COD16x4(d3d2c3c2, b3b2a3a2));
8346    } else if (laneszB == 1) {
8347       // First, interleave at the 32-bit lane size.
8348       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8349       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8350       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8351       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8352       assign(b1b0a1a0, ILO32x2(u1, u0));
8353       assign(b3b2a3a2, IHI32x2(u1, u0));
8354       assign(d1d0c1c0, ILO32x2(u3, u2));
8355       assign(d3d2c3c2, IHI32x2(u3, u2));
8356       // And interleave (cat) at the 16 bit size.
8357       IRTemp i0x = newTemp(Ity_I64);
8358       IRTemp i1x = newTemp(Ity_I64);
8359       IRTemp i2x = newTemp(Ity_I64);
8360       IRTemp i3x = newTemp(Ity_I64);
8361       assign(i0x, CEV16x4(d1d0c1c0, b1b0a1a0));
8362       assign(i1x, COD16x4(d1d0c1c0, b1b0a1a0));
8363       assign(i2x, CEV16x4(d3d2c3c2, b3b2a3a2));
8364       assign(i3x, COD16x4(d3d2c3c2, b3b2a3a2));
8365       // And rearrange within each word, to get the right 8 bit lanes.
8366       assign(*i0, COD8x8(i0x, SHL64(i0x, 8)));
8367       assign(*i1, COD8x8(i1x, SHL64(i1x, 8)));
8368       assign(*i2, COD8x8(i2x, SHL64(i2x, 8)));
8369       assign(*i3, COD8x8(i3x, SHL64(i3x, 8)));
8370    } else {
8371       // Can never happen, since VLD4 only has valid lane widths of 32,
8372       // 16 or 8 bits.
8373       vpanic("math_DEINTERLEAVE_4");
8374    }
8375 #  undef SHL64
8376 #  undef COD8x8
8377 #  undef COD16x4
8378 #  undef CEV16x4
8379 #  undef ILO32x2
8380 #  undef IHI32x2
8381 }
8382
8383 /* A7.7 Advanced SIMD element or structure load/store instructions */
8384 static
8385 Bool dis_neon_load_or_store ( UInt theInstr,
8386                               Bool isT, IRTemp condT )
8387 {
8388 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
8389    UInt bA = INSN(23,23);
8390    UInt fB = INSN(11,8);
8391    UInt bL = INSN(21,21);
8392    UInt rD = (INSN(22,22) << 4) | INSN(15,12);
8393    UInt rN = INSN(19,16);
8394    UInt rM = INSN(3,0);
8395    UInt N, size, i, j;
8396    UInt inc;
8397    UInt regs = 1;
8398
8399    if (isT) {
8400       vassert(condT != IRTemp_INVALID);
8401    } else {
8402       vassert(condT == IRTemp_INVALID);
8403    }
8404    /* So now, if condT is not IRTemp_INVALID, we know we're
8405       dealing with Thumb code. */
8406
8407    if (INSN(20,20) != 0)
8408       return False;
8409
8410    IRTemp initialRn = newTemp(Ity_I32);
8411    assign(initialRn, isT ? getIRegT(rN) : getIRegA(rN));
8412
8413    IRTemp initialRm = newTemp(Ity_I32);
8414    assign(initialRm, isT ? getIRegT(rM) : getIRegA(rM));
8415
8416    /* There are 3 cases:
8417       (1) VSTn / VLDn (n-element structure from/to one lane)
8418       (2) VLDn (single element to all lanes)
8419       (3) VSTn / VLDn (multiple n-element structures)
8420    */
8421    if (bA) {
8422       N = fB & 3;
8423       if ((fB >> 2) < 3) {
8424          /* ------------ Case (1) ------------
8425             VSTn / VLDn (n-element structure from/to one lane) */
8426
8427          size = fB >> 2;
8428
8429          switch (size) {
8430             case 0: i = INSN(7,5); inc = 1; break;
8431             case 1: i = INSN(7,6); inc = INSN(5,5) ? 2 : 1; break;
8432             case 2: i = INSN(7,7); inc = INSN(6,6) ? 2 : 1; break;
8433             case 3: return False;
8434             default: vassert(0);
8435          }
8436
8437          IRTemp addr = newTemp(Ity_I32);
8438          assign(addr, mkexpr(initialRn));
8439
8440          // go uncond
8441          if (condT != IRTemp_INVALID)
8442             mk_skip_over_T32_if_cond_is_false(condT);
8443          // now uncond
8444
8445          if (bL)
8446             mk_neon_elem_load_to_one_lane(rD, inc, i, N, size, addr);
8447          else
8448             mk_neon_elem_store_from_one_lane(rD, inc, i, N, size, addr);
8449          DIP("v%s%u.%d {", bL ? "ld" : "st", N + 1, 8 << size);
8450          for (j = 0; j <= N; j++) {
8451             if (j)
8452                DIP(", ");
8453             DIP("d%u[%u]", rD + j * inc, i);
8454          }
8455          DIP("}, [r%u]", rN);
8456          if (rM != 13 && rM != 15) {
8457             DIP(", r%u\n", rM);
8458          } else {
8459             DIP("%s\n", (rM != 15) ? "!" : "");
8460          }
8461       } else {
8462          /* ------------ Case (2) ------------
8463             VLDn (single element to all lanes) */
8464          UInt r;
8465          if (bL == 0)
8466             return False;
8467
8468          inc = INSN(5,5) + 1;
8469          size = INSN(7,6);
8470
8471          /* size == 3 and size == 2 cases differ in alignment constraints */
8472          if (size == 3 && N == 3 && INSN(4,4) == 1)
8473             size = 2;
8474
8475          if (size == 0 && N == 0 && INSN(4,4) == 1)
8476             return False;
8477          if (N == 2 && INSN(4,4) == 1)
8478             return False;
8479          if (size == 3)
8480             return False;
8481
8482          // go uncond
8483          if (condT != IRTemp_INVALID)
8484             mk_skip_over_T32_if_cond_is_false(condT);
8485          // now uncond
8486
8487          IRTemp addr = newTemp(Ity_I32);
8488          assign(addr, mkexpr(initialRn));
8489
8490          if (N == 0 && INSN(5,5))
8491             regs = 2;
8492
8493          for (r = 0; r < regs; r++) {
8494             switch (size) {
8495                case 0:
8496                   putDRegI64(rD + r, unop(Iop_Dup8x8,
8497                                           loadLE(Ity_I8, mkexpr(addr))),
8498                              IRTemp_INVALID);
8499                   break;
8500                case 1:
8501                   putDRegI64(rD + r, unop(Iop_Dup16x4,
8502                                           loadLE(Ity_I16, mkexpr(addr))),
8503                              IRTemp_INVALID);
8504                   break;
8505                case 2:
8506                   putDRegI64(rD + r, unop(Iop_Dup32x2,
8507                                           loadLE(Ity_I32, mkexpr(addr))),
8508                              IRTemp_INVALID);
8509                   break;
8510                default:
8511                   vassert(0);
8512             }
8513             for (i = 1; i <= N; i++) {
8514                switch (size) {
8515                   case 0:
8516                      putDRegI64(rD + r + i * inc,
8517                                 unop(Iop_Dup8x8,
8518                                      loadLE(Ity_I8, binop(Iop_Add32,
8519                                                           mkexpr(addr),
8520                                                           mkU32(i * 1)))),
8521                                 IRTemp_INVALID);
8522                      break;
8523                   case 1:
8524                      putDRegI64(rD + r + i * inc,
8525                                 unop(Iop_Dup16x4,
8526                                      loadLE(Ity_I16, binop(Iop_Add32,
8527                                                            mkexpr(addr),
8528                                                            mkU32(i * 2)))),
8529                                 IRTemp_INVALID);
8530                      break;
8531                   case 2:
8532                      putDRegI64(rD + r + i * inc,
8533                                 unop(Iop_Dup32x2,
8534                                      loadLE(Ity_I32, binop(Iop_Add32,
8535                                                            mkexpr(addr),
8536                                                            mkU32(i * 4)))),
8537                                 IRTemp_INVALID);
8538                      break;
8539                   default:
8540                      vassert(0);
8541                }
8542             }
8543          }
8544          DIP("vld%u.%d {", N + 1, 8 << size);
8545          for (r = 0; r < regs; r++) {
8546             for (i = 0; i <= N; i++) {
8547                if (i || r)
8548                   DIP(", ");
8549                DIP("d%u[]", rD + r + i * inc);
8550             }
8551          }
8552          DIP("}, [r%u]", rN);
8553          if (rM != 13 && rM != 15) {
8554             DIP(", r%u\n", rM);
8555          } else {
8556             DIP("%s\n", (rM != 15) ? "!" : "");
8557          }
8558       }
8559       /* Writeback.  We're uncond here, so no condT-ing. */
8560       if (rM != 15) {
8561          if (rM == 13) {
8562             IRExpr* e = binop(Iop_Add32,
8563                               mkexpr(initialRn),
8564                               mkU32((1 << size) * (N + 1)));
8565             if (isT)
8566                putIRegT(rN, e, IRTemp_INVALID);
8567             else
8568                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8569          } else {
8570             IRExpr* e = binop(Iop_Add32,
8571                               mkexpr(initialRn),
8572                               mkexpr(initialRm));
8573             if (isT)
8574                putIRegT(rN, e, IRTemp_INVALID);
8575             else
8576                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8577          }
8578       }
8579       return True;
8580    } else {
8581       /* ------------ Case (3) ------------
8582          VSTn / VLDn (multiple n-element structures) */
8583       inc = (fB & 1) + 1;
8584
8585       if (fB == BITS4(0,0,1,0)       // Dd, Dd+1, Dd+2, Dd+3  inc = 1  regs = 4
8586           || fB == BITS4(0,1,1,0)    // Dd, Dd+1, Dd+2        inc = 1  regs = 3
8587           || fB == BITS4(0,1,1,1)    // Dd                    inc = 2  regs = 1
8588           || fB == BITS4(1,0,1,0)) { // Dd, Dd+1              inc = 1  regs = 2
8589          N = 0; // VLD1/VST1.  'inc' does not appear to have any
8590                 // meaning for the VLD1/VST1 cases.  'regs' is the number of
8591                 // registers involved.
8592          if (rD + regs > 32) return False;
8593       }
8594       else
8595       if (fB == BITS4(0,0,1,1)       // Dd, Dd+1, Dd+2, Dd+3  inc=2  regs = 2
8596           || fB == BITS4(1,0,0,0)    // Dd, Dd+1              inc=1  regs = 1
8597           || fB == BITS4(1,0,0,1)) { // Dd, Dd+2              inc=2  regs = 1
8598          N = 1; // VLD2/VST2.  'regs' is the number of register-pairs involved
8599          if (regs == 1 && inc == 1 && rD + 1 >= 32) return False;
8600          if (regs == 1 && inc == 2 && rD + 2 >= 32) return False;
8601          if (regs == 2 && inc == 2 && rD + 3 >= 32) return False;
8602       } else if (fB == BITS4(0,1,0,0) || fB == BITS4(0,1,0,1)) {
8603          N = 2; // VLD3/VST3
8604          if (inc == 1 && rD + 2 >= 32) return False;
8605          if (inc == 2 && rD + 4 >= 32) return False;
8606       } else if (fB == BITS4(0,0,0,0) || fB == BITS4(0,0,0,1)) {
8607          N = 3; // VLD4/VST4
8608          if (inc == 1 && rD + 3 >= 32) return False;
8609          if (inc == 2 && rD + 6 >= 32) return False;
8610       } else {
8611          return False;
8612       }
8613
8614       if (N == 1 && fB == BITS4(0,0,1,1)) {
8615          regs = 2;
8616       } else if (N == 0) {
8617          if (fB == BITS4(1,0,1,0)) {
8618             regs = 2;
8619          } else if (fB == BITS4(0,1,1,0)) {
8620             regs = 3;
8621          } else if (fB == BITS4(0,0,1,0)) {
8622             regs = 4;
8623          }
8624       }
8625
8626       size = INSN(7,6);
8627       if (N == 0 && size == 3)
8628          size = 2;
8629       if (size == 3)
8630          return False;
8631
8632       // go uncond
8633       if (condT != IRTemp_INVALID)
8634          mk_skip_over_T32_if_cond_is_false(condT);
8635       // now uncond
8636
8637       IRTemp addr = newTemp(Ity_I32);
8638       assign(addr, mkexpr(initialRn));
8639
8640       if (N == 0 /* No interleaving -- VLD1/VST1 */) {
8641          UInt r;
8642          vassert(regs == 1 || regs == 2 || regs == 3 || regs == 4);
8643          /* inc has no relevance here */
8644          for (r = 0; r < regs; r++) {
8645             if (bL)
8646                putDRegI64(rD+r, loadLE(Ity_I64, mkexpr(addr)), IRTemp_INVALID);
8647             else
8648                storeLE(mkexpr(addr), getDRegI64(rD+r));
8649             IRTemp tmp = newTemp(Ity_I32);
8650             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(8)));
8651             addr = tmp;
8652          }
8653       }
8654       else
8655       if (N == 1 /* 2-interleaving -- VLD2/VST2 */) {
8656          vassert( (regs == 1 && (inc == 1 || inc == 2))
8657                    || (regs == 2 && inc == 2) );
8658          // Make 'nregs' be the number of registers and 'regstep'
8659          // equal the actual register-step.  The ARM encoding, using 'regs'
8660          // and 'inc', is bizarre.  After this, we have:
8661          // Dd, Dd+1              regs = 1, inc = 1,   nregs = 2, regstep = 1
8662          // Dd, Dd+2              regs = 1, inc = 2,   nregs = 2, regstep = 2
8663          // Dd, Dd+1, Dd+2, Dd+3  regs = 2, inc = 2,   nregs = 4, regstep = 1
8664          UInt nregs   = 2;
8665          UInt regstep = 1;
8666          if (regs == 1 && inc == 1) {
8667             /* nothing */
8668          } else if (regs == 1 && inc == 2) {
8669             regstep = 2;
8670          } else if (regs == 2 && inc == 2) {
8671             nregs = 4;
8672          } else {
8673             vassert(0);
8674          }
8675          // 'a' is address,
8676          // 'di' is interleaved data, 'du' is uninterleaved data
8677          if (nregs == 2) {
8678             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8679             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8680             IRTemp  di0 = newTemp(Ity_I64);
8681             IRTemp  di1 = newTemp(Ity_I64);
8682             IRTemp  du0 = newTemp(Ity_I64);
8683             IRTemp  du1 = newTemp(Ity_I64);
8684             if (bL) {
8685                assign(di0, loadLE(Ity_I64, a0));
8686                assign(di1, loadLE(Ity_I64, a1));
8687                math_DEINTERLEAVE_2(&du0, &du1, di0, di1, 1 << size);
8688                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8689                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8690             } else {
8691                assign(du0, getDRegI64(rD + 0 * regstep));
8692                assign(du1, getDRegI64(rD + 1 * regstep));
8693                math_INTERLEAVE_2(&di0, &di1, du0, du1, 1 << size);
8694                storeLE(a0, mkexpr(di0));
8695                storeLE(a1, mkexpr(di1));
8696             }
8697             IRTemp tmp = newTemp(Ity_I32);
8698             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(16)));
8699             addr = tmp;
8700          } else {
8701             vassert(nregs == 4);
8702             vassert(regstep == 1);
8703             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8704             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8705             IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8706             IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8707             IRTemp  di0 = newTemp(Ity_I64);
8708             IRTemp  di1 = newTemp(Ity_I64);
8709             IRTemp  di2 = newTemp(Ity_I64);
8710             IRTemp  di3 = newTemp(Ity_I64);
8711             IRTemp  du0 = newTemp(Ity_I64);
8712             IRTemp  du1 = newTemp(Ity_I64);
8713             IRTemp  du2 = newTemp(Ity_I64);
8714             IRTemp  du3 = newTemp(Ity_I64);
8715             if (bL) {
8716                assign(di0, loadLE(Ity_I64, a0));
8717                assign(di1, loadLE(Ity_I64, a1));
8718                assign(di2, loadLE(Ity_I64, a2));
8719                assign(di3, loadLE(Ity_I64, a3));
8720                // Note spooky interleaving: du0, du2, di0, di1 etc
8721                math_DEINTERLEAVE_2(&du0, &du2, di0, di1, 1 << size);
8722                math_DEINTERLEAVE_2(&du1, &du3, di2, di3, 1 << size);
8723                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8724                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8725                putDRegI64(rD + 2 * regstep, mkexpr(du2), IRTemp_INVALID);
8726                putDRegI64(rD + 3 * regstep, mkexpr(du3), IRTemp_INVALID);
8727             } else {
8728                assign(du0, getDRegI64(rD + 0 * regstep));
8729                assign(du1, getDRegI64(rD + 1 * regstep));
8730                assign(du2, getDRegI64(rD + 2 * regstep));
8731                assign(du3, getDRegI64(rD + 3 * regstep));
8732                // Note spooky interleaving: du0, du2, di0, di1 etc
8733                math_INTERLEAVE_2(&di0, &di1, du0, du2, 1 << size);
8734                math_INTERLEAVE_2(&di2, &di3, du1, du3, 1 << size);
8735                storeLE(a0, mkexpr(di0));
8736                storeLE(a1, mkexpr(di1));
8737                storeLE(a2, mkexpr(di2));
8738                storeLE(a3, mkexpr(di3));
8739             }
8740
8741             IRTemp tmp = newTemp(Ity_I32);
8742             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8743             addr = tmp;
8744          }
8745       }
8746       else
8747       if (N == 2 /* 3-interleaving -- VLD3/VST3 */) {
8748          // Dd, Dd+1, Dd+2   regs = 1, inc = 1
8749          // Dd, Dd+2, Dd+4   regs = 1, inc = 2
8750          vassert(regs == 1 && (inc == 1 || inc == 2));
8751          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8752          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8753          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8754          IRTemp  di0 = newTemp(Ity_I64);
8755          IRTemp  di1 = newTemp(Ity_I64);
8756          IRTemp  di2 = newTemp(Ity_I64);
8757          IRTemp  du0 = newTemp(Ity_I64);
8758          IRTemp  du1 = newTemp(Ity_I64);
8759          IRTemp  du2 = newTemp(Ity_I64);
8760          if (bL) {
8761             assign(di0, loadLE(Ity_I64, a0));
8762             assign(di1, loadLE(Ity_I64, a1));
8763             assign(di2, loadLE(Ity_I64, a2));
8764             math_DEINTERLEAVE_3(&du0, &du1, &du2, di0, di1, di2, 1 << size);
8765             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8766             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8767             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8768          } else {
8769             assign(du0, getDRegI64(rD + 0 * inc));
8770             assign(du1, getDRegI64(rD + 1 * inc));
8771             assign(du2, getDRegI64(rD + 2 * inc));
8772             math_INTERLEAVE_3(&di0, &di1, &di2, du0, du1, du2, 1 << size);
8773             storeLE(a0, mkexpr(di0));
8774             storeLE(a1, mkexpr(di1));
8775             storeLE(a2, mkexpr(di2));
8776          }
8777          IRTemp tmp = newTemp(Ity_I32);
8778          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(24)));
8779          addr = tmp;
8780       }
8781       else
8782       if (N == 3 /* 4-interleaving -- VLD4/VST4 */) {
8783          // Dd, Dd+1, Dd+2, Dd+3   regs = 1, inc = 1
8784          // Dd, Dd+2, Dd+4, Dd+6   regs = 1, inc = 2
8785          vassert(regs == 1 && (inc == 1 || inc == 2));
8786          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8787          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8788          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8789          IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8790          IRTemp  di0 = newTemp(Ity_I64);
8791          IRTemp  di1 = newTemp(Ity_I64);
8792          IRTemp  di2 = newTemp(Ity_I64);
8793          IRTemp  di3 = newTemp(Ity_I64);
8794          IRTemp  du0 = newTemp(Ity_I64);
8795          IRTemp  du1 = newTemp(Ity_I64);
8796          IRTemp  du2 = newTemp(Ity_I64);
8797          IRTemp  du3 = newTemp(Ity_I64);
8798          if (bL) {
8799             assign(di0, loadLE(Ity_I64, a0));
8800             assign(di1, loadLE(Ity_I64, a1));
8801             assign(di2, loadLE(Ity_I64, a2));
8802             assign(di3, loadLE(Ity_I64, a3));
8803             math_DEINTERLEAVE_4(&du0, &du1, &du2, &du3,
8804                                 di0, di1, di2, di3, 1 << size);
8805             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8806             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8807             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8808             putDRegI64(rD + 3 * inc, mkexpr(du3), IRTemp_INVALID);
8809          } else {
8810             assign(du0, getDRegI64(rD + 0 * inc));
8811             assign(du1, getDRegI64(rD + 1 * inc));
8812             assign(du2, getDRegI64(rD + 2 * inc));
8813             assign(du3, getDRegI64(rD + 3 * inc));
8814             math_INTERLEAVE_4(&di0, &di1, &di2, &di3,
8815                               du0, du1, du2, du3, 1 << size);
8816             storeLE(a0, mkexpr(di0));
8817             storeLE(a1, mkexpr(di1));
8818             storeLE(a2, mkexpr(di2));
8819             storeLE(a3, mkexpr(di3));
8820          }
8821          IRTemp tmp = newTemp(Ity_I32);
8822          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8823          addr = tmp;
8824       }
8825       else {
8826          vassert(0);
8827       }
8828
8829       /* Writeback */
8830       if (rM != 15) {
8831          IRExpr* e;
8832          if (rM == 13) {
8833             e = binop(Iop_Add32, mkexpr(initialRn),
8834                                  mkU32(8 * (N + 1) * regs));
8835          } else {
8836             e = binop(Iop_Add32, mkexpr(initialRn),
8837                                  mkexpr(initialRm));
8838          }
8839          if (isT)
8840             putIRegT(rN, e, IRTemp_INVALID);
8841          else
8842             putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8843       }
8844
8845       DIP("v%s%u.%d {", bL ? "ld" : "st", N + 1, 8 << INSN(7,6));
8846       if ((inc == 1 && regs * (N + 1) > 1)
8847           || (inc == 2 && regs > 1 && N > 0)) {
8848          DIP("d%u-d%u", rD, rD + regs * (N + 1) - 1);
8849       } else {
8850          UInt r;
8851          for (r = 0; r < regs; r++) {
8852             for (i = 0; i <= N; i++) {
8853                if (i || r)
8854                   DIP(", ");
8855                DIP("d%u", rD + r + i * inc);
8856             }
8857          }
8858       }
8859       DIP("}, [r%u]", rN);
8860       if (rM != 13 && rM != 15) {
8861          DIP(", r%u\n", rM);
8862       } else {
8863          DIP("%s\n", (rM != 15) ? "!" : "");
8864       }
8865       return True;
8866    }
8867 #  undef INSN
8868 }
8869
8870
8871 /*------------------------------------------------------------*/
8872 /*--- NEON, top level control                              ---*/
8873 /*------------------------------------------------------------*/
8874
8875 /* Both ARM and Thumb */
8876
8877 /* Translate a NEON instruction.    If successful, returns
8878    True and *dres may or may not be updated.  If failure, returns
8879    False and doesn't change *dres nor create any IR.
8880
8881    The Thumb and ARM encodings are similar for the 24 bottom bits, but
8882    the top 8 bits are slightly different.  In both cases, the caller
8883    must pass the entire 32 bits.  Callers may pass any instruction;
8884    this ignores non-NEON ones.
8885
8886    Caller must supply an IRTemp 'condT' holding the gating condition,
8887    or IRTemp_INVALID indicating the insn is always executed.  In ARM
8888    code, this must always be IRTemp_INVALID because NEON insns are
8889    unconditional for ARM.
8890
8891    Finally, the caller must indicate whether this occurs in ARM or in
8892    Thumb code.
8893
8894    This only handles NEON for ARMv7 and below.  The NEON extensions
8895    for v8 are handled by decode_V8_instruction.
8896 */
8897 static Bool decode_NEON_instruction_ARMv7_and_below (
8898                /*MOD*/DisResult* dres,
8899                UInt              insn32,
8900                IRTemp            condT,
8901                Bool              isT
8902             )
8903 {
8904 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn32, (_bMax), (_bMin))
8905
8906    /* There are two kinds of instruction to deal with: load/store and
8907       data processing.  In each case, in ARM mode we merely identify
8908       the kind, and pass it on to the relevant sub-handler.  In Thumb
8909       mode we identify the kind, swizzle the bits around to make it
8910       have the same encoding as in ARM, and hand it on to the
8911       sub-handler.
8912    */
8913
8914    /* In ARM mode, NEON instructions can't be conditional. */
8915    if (!isT)
8916       vassert(condT == IRTemp_INVALID);
8917
8918    /* Data processing:
8919       Thumb: 111U 1111 AAAA Axxx xxxx BBBB CCCC xxxx
8920       ARM:   1111 001U AAAA Axxx xxxx BBBB CCCC xxxx
8921    */
8922    if (!isT && INSN(31,25) == BITS7(1,1,1,1,0,0,1)) {
8923       // ARM, DP
8924       return dis_neon_data_processing(INSN(31,0), condT);
8925    }
8926    if (isT && INSN(31,29) == BITS3(1,1,1)
8927        && INSN(27,24) == BITS4(1,1,1,1)) {
8928       // Thumb, DP
8929       UInt reformatted = INSN(23,0);
8930       reformatted |= (((UInt)INSN(28,28)) << 24); // U bit
8931       reformatted |= (((UInt)BITS7(1,1,1,1,0,0,1)) << 25);
8932       return dis_neon_data_processing(reformatted, condT);
8933    }
8934
8935    /* Load/store:
8936       Thumb: 1111 1001 AxL0 xxxx xxxx BBBB xxxx xxxx
8937       ARM:   1111 0100 AxL0 xxxx xxxx BBBB xxxx xxxx
8938    */
8939    if (!isT && INSN(31,24) == BITS8(1,1,1,1,0,1,0,0)) {
8940       // ARM, memory
8941       return dis_neon_load_or_store(INSN(31,0), isT, condT);
8942    }
8943    if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
8944       UInt reformatted = INSN(23,0);
8945       reformatted |= (((UInt)BITS8(1,1,1,1,0,1,0,0)) << 24);
8946       return dis_neon_load_or_store(reformatted, isT, condT);
8947    }
8948
8949    /* Doesn't match. */
8950    return False;
8951
8952 #  undef INSN
8953 }
8954
8955
8956 /*------------------------------------------------------------*/
8957 /*--- V6 MEDIA instructions                                ---*/
8958 /*------------------------------------------------------------*/
8959
8960 /* Both ARM and Thumb */
8961
8962 /* Translate a V6 media instruction.    If successful, returns
8963    True and *dres may or may not be updated.  If failure, returns
8964    False and doesn't change *dres nor create any IR.
8965
8966    The Thumb and ARM encodings are completely different.  In Thumb
8967    mode, the caller must pass the entire 32 bits.  In ARM mode it must
8968    pass the lower 28 bits.  Apart from that, callers may pass any
8969    instruction; this function ignores anything it doesn't recognise.
8970
8971    Caller must supply an IRTemp 'condT' holding the gating condition,
8972    or IRTemp_INVALID indicating the insn is always executed.
8973
8974    Caller must also supply an ARMCondcode 'conq'.  This is only used
8975    for debug printing, no other purpose.  For ARM, this is simply the
8976    top 4 bits of the original instruction.  For Thumb, the condition
8977    is not (really) known until run time, and so ARMCondAL should be
8978    passed, only so that printing of these instructions does not show
8979    any condition.
8980
8981    Finally, the caller must indicate whether this occurs in ARM or in
8982    Thumb code.
8983 */
8984 static Bool decode_V6MEDIA_instruction (
8985                /*MOD*/DisResult* dres,
8986                UInt              insnv6m,
8987                IRTemp            condT,
8988                ARMCondcode       conq,
8989                Bool              isT
8990             )
8991 {
8992 #  define INSNA(_bMax,_bMin)   SLICE_UInt(insnv6m, (_bMax), (_bMin))
8993 #  define INSNT0(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 16) & 0xFFFF), \
8994                                            (_bMax), (_bMin) )
8995 #  define INSNT1(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 0)  & 0xFFFF), \
8996                                            (_bMax), (_bMin) )
8997    HChar dis_buf[128];
8998    dis_buf[0] = 0;
8999
9000    if (isT) {
9001       vassert(conq == ARMCondAL);
9002    } else {
9003       vassert(INSNA(31,28) == BITS4(0,0,0,0)); // caller's obligation
9004       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
9005    }
9006
9007    /* ----------- smulbb, smulbt, smultb, smultt ----------- */
9008    {
9009      UInt regD = 99, regM = 99, regN = 99, bitM = 0, bitN = 0;
9010      Bool gate = False;
9011
9012      if (isT) {
9013         if (INSNT0(15,4) == 0xFB1 && INSNT1(15,12) == BITS4(1,1,1,1)
9014             && INSNT1(7,6) == BITS2(0,0)) {
9015            regD = INSNT1(11,8);
9016            regM = INSNT1(3,0);
9017            regN = INSNT0(3,0);
9018            bitM = INSNT1(4,4);
9019            bitN = INSNT1(5,5);
9020            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9021               gate = True;
9022         }
9023      } else {
9024         if (BITS8(0,0,0,1,0,1,1,0) == INSNA(27,20) &&
9025             BITS4(0,0,0,0)         == INSNA(15,12) &&
9026             BITS4(1,0,0,0)         == (INSNA(7,4) & BITS4(1,0,0,1)) ) {
9027            regD = INSNA(19,16);
9028            regM = INSNA(11,8);
9029            regN = INSNA(3,0);
9030            bitM = INSNA(6,6);
9031            bitN = INSNA(5,5);
9032            if (regD != 15 && regN != 15 && regM != 15)
9033               gate = True;
9034         }
9035      }
9036
9037      if (gate) {
9038         IRTemp srcN = newTemp(Ity_I32);
9039         IRTemp srcM = newTemp(Ity_I32);
9040         IRTemp res  = newTemp(Ity_I32);
9041
9042         assign( srcN, binop(Iop_Sar32,
9043                             binop(Iop_Shl32,
9044                                   isT ? getIRegT(regN) : getIRegA(regN),
9045                                   mkU8(bitN ? 0 : 16)), mkU8(16)) );
9046         assign( srcM, binop(Iop_Sar32,
9047                             binop(Iop_Shl32,
9048                                   isT ? getIRegT(regM) : getIRegA(regM),
9049                                   mkU8(bitM ? 0 : 16)), mkU8(16)) );
9050         assign( res, binop(Iop_Mul32, mkexpr(srcN), mkexpr(srcM)) );
9051
9052         if (isT)
9053            putIRegT( regD, mkexpr(res), condT );
9054         else
9055            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9056
9057         DIP( "smul%c%c%s r%u, r%u, r%u\n", bitN ? 't' : 'b', bitM ? 't' : 'b',
9058              nCC(conq), regD, regN, regM );
9059         return True;
9060      }
9061      /* fall through */
9062    }
9063
9064    /* ------------ smulwb<y><c> <Rd>,<Rn>,<Rm> ------------- */
9065    /* ------------ smulwt<y><c> <Rd>,<Rn>,<Rm> ------------- */
9066    {
9067      UInt regD = 99, regN = 99, regM = 99, bitM = 0;
9068      Bool gate = False;
9069
9070      if (isT) {
9071         if (INSNT0(15,4) == 0xFB3 && INSNT1(15,12) == BITS4(1,1,1,1)
9072             && INSNT1(7,5) == BITS3(0,0,0)) {
9073           regN = INSNT0(3,0);
9074           regD = INSNT1(11,8);
9075           regM = INSNT1(3,0);
9076           bitM = INSNT1(4,4);
9077           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9078              gate = True;
9079         }
9080      } else {
9081         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
9082             INSNA(15,12) == BITS4(0,0,0,0)         &&
9083             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,1,0)) {
9084            regD = INSNA(19,16);
9085            regN = INSNA(3,0);
9086            regM = INSNA(11,8);
9087            bitM = INSNA(6,6);
9088            if (regD != 15 && regN != 15 && regM != 15)
9089               gate = True;
9090         }
9091      }
9092
9093      if (gate) {
9094         IRTemp irt_prod = newTemp(Ity_I64);
9095
9096         assign( irt_prod,
9097                 binop(Iop_MullS32,
9098                       isT ? getIRegT(regN) : getIRegA(regN),
9099                       binop(Iop_Sar32,
9100                             binop(Iop_Shl32,
9101                                   isT ? getIRegT(regM) : getIRegA(regM),
9102                                   mkU8(bitM ? 0 : 16)),
9103                             mkU8(16))) );
9104
9105         IRExpr* ire_result = binop(Iop_Or32,
9106                                    binop( Iop_Shl32,
9107                                           unop(Iop_64HIto32, mkexpr(irt_prod)),
9108                                           mkU8(16) ),
9109                                    binop( Iop_Shr32,
9110                                           unop(Iop_64to32, mkexpr(irt_prod)),
9111                                           mkU8(16) ) );
9112
9113         if (isT)
9114            putIRegT( regD, ire_result, condT );
9115         else
9116            putIRegA( regD, ire_result, condT, Ijk_Boring );
9117
9118         DIP("smulw%c%s r%u, r%u, r%u\n",
9119             bitM ? 't' : 'b', nCC(conq),regD,regN,regM);
9120         return True;
9121      }
9122      /* fall through */
9123    }
9124
9125    /* ------------ pkhbt<c> Rd, Rn, Rm {,LSL #imm} ------------- */
9126    /* ------------ pkhtb<c> Rd, Rn, Rm {,ASR #imm} ------------- */
9127    {
9128      UInt regD = 99, regN = 99, regM = 99, imm5 = 99, shift_type = 99;
9129      Bool tbform = False;
9130      Bool gate = False;
9131
9132      if (isT) {
9133         if (INSNT0(15,4) == 0xEAC
9134             && INSNT1(15,15) == 0 && INSNT1(4,4) == 0) {
9135            regN = INSNT0(3,0);
9136            regD = INSNT1(11,8);
9137            regM = INSNT1(3,0);
9138            imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
9139            shift_type = (INSNT1(5,5) << 1) | 0;
9140            tbform = (INSNT1(5,5) == 0) ? False : True;
9141            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9142               gate = True;
9143         }
9144      } else {
9145         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
9146             INSNA(5,4)   == BITS2(0,1) /*          &&
9147             (INSNA(6,6)  == 0 || INSNA(6,6) == 1)
9148             This last bit with INSNA(6,6) is correct, but gcc 8 complains
9149             (correctly) that it is always true.  So I commented it out
9150             to keep gcc quiet. */ ) {
9151            regD = INSNA(15,12);
9152            regN = INSNA(19,16);
9153            regM = INSNA(3,0);
9154            imm5 = INSNA(11,7);
9155            shift_type = (INSNA(6,6) << 1) | 0;
9156            tbform = (INSNA(6,6) == 0) ? False : True;
9157            if (regD != 15 && regN != 15 && regM != 15)
9158               gate = True;
9159         }
9160      }
9161
9162      if (gate) {
9163         IRTemp irt_regM       = newTemp(Ity_I32);
9164         IRTemp irt_regM_shift = newTemp(Ity_I32);
9165         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
9166         compute_result_and_C_after_shift_by_imm5(
9167            dis_buf, &irt_regM_shift, NULL, irt_regM, shift_type, imm5, regM );
9168
9169         UInt mask = (tbform == True) ? 0x0000FFFF : 0xFFFF0000;
9170         IRExpr* ire_result
9171           = binop( Iop_Or32,
9172                    binop(Iop_And32, mkexpr(irt_regM_shift), mkU32(mask)),
9173                    binop(Iop_And32, isT ? getIRegT(regN) : getIRegA(regN),
9174                                     unop(Iop_Not32, mkU32(mask))) );
9175
9176         if (isT)
9177            putIRegT( regD, ire_result, condT );
9178         else
9179            putIRegA( regD, ire_result, condT, Ijk_Boring );
9180
9181         DIP( "pkh%s%s r%u, r%u, r%u %s\n", tbform ? "tb" : "bt",
9182              nCC(conq), regD, regN, regM, dis_buf );
9183
9184         return True;
9185      }
9186      /* fall through */
9187    }
9188
9189    /* ---------- usat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9190    {
9191      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9192      Bool gate = False;
9193
9194      if (isT) {
9195         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,1,0)
9196             && INSNT0(4,4) == 0
9197             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9198            regD       = INSNT1(11,8);
9199            regN       = INSNT0(3,0);
9200            shift_type = (INSNT0(5,5) << 1) | 0;
9201            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9202            sat_imm    = INSNT1(4,0);
9203            if (!isBadRegT(regD) && !isBadRegT(regN))
9204               gate = True;
9205            if (shift_type == BITS2(1,0) && imm5 == 0)
9206               gate = False;
9207         }
9208      } else {
9209         if (INSNA(27,21) == BITS7(0,1,1,0,1,1,1) &&
9210             INSNA(5,4)   == BITS2(0,1)) {
9211            regD       = INSNA(15,12);
9212            regN       = INSNA(3,0);
9213            shift_type = (INSNA(6,6) << 1) | 0;
9214            imm5       = INSNA(11,7);
9215            sat_imm    = INSNA(20,16);
9216            if (regD != 15 && regN != 15)
9217               gate = True;
9218         }
9219      }
9220
9221      if (gate) {
9222         IRTemp irt_regN       = newTemp(Ity_I32);
9223         IRTemp irt_regN_shift = newTemp(Ity_I32);
9224         IRTemp irt_sat_Q      = newTemp(Ity_I32);
9225         IRTemp irt_result     = newTemp(Ity_I32);
9226
9227         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9228         compute_result_and_C_after_shift_by_imm5(
9229                 dis_buf, &irt_regN_shift, NULL,
9230                 irt_regN, shift_type, imm5, regN );
9231
9232         armUnsignedSatQ( &irt_result, &irt_sat_Q, irt_regN_shift, sat_imm );
9233         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9234
9235         if (isT)
9236            putIRegT( regD, mkexpr(irt_result), condT );
9237         else
9238            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9239
9240         DIP("usat%s r%u, #0x%04x, %s\n",
9241             nCC(conq), regD, imm5, dis_buf);
9242         return True;
9243      }
9244      /* fall through */
9245    }
9246
9247   /* ----------- ssat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9248    {
9249      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9250      Bool gate = False;
9251
9252      if (isT) {
9253         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9254             && INSNT0(4,4) == 0
9255             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9256            regD       = INSNT1(11,8);
9257            regN       = INSNT0(3,0);
9258            shift_type = (INSNT0(5,5) << 1) | 0;
9259            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9260            sat_imm    = INSNT1(4,0) + 1;
9261            if (!isBadRegT(regD) && !isBadRegT(regN))
9262               gate = True;
9263            if (shift_type == BITS2(1,0) && imm5 == 0)
9264               gate = False;
9265         }
9266      } else {
9267         if (INSNA(27,21) == BITS7(0,1,1,0,1,0,1) &&
9268             INSNA(5,4)   == BITS2(0,1)) {
9269            regD       = INSNA(15,12);
9270            regN       = INSNA(3,0);
9271            shift_type = (INSNA(6,6) << 1) | 0;
9272            imm5       = INSNA(11,7);
9273            sat_imm    = INSNA(20,16) + 1;
9274            if (regD != 15 && regN != 15)
9275               gate = True;
9276         }
9277      }
9278
9279      if (gate) {
9280         IRTemp irt_regN       = newTemp(Ity_I32);
9281         IRTemp irt_regN_shift = newTemp(Ity_I32);
9282         IRTemp irt_sat_Q      = newTemp(Ity_I32);
9283         IRTemp irt_result     = newTemp(Ity_I32);
9284
9285         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9286         compute_result_and_C_after_shift_by_imm5(
9287                 dis_buf, &irt_regN_shift, NULL,
9288                 irt_regN, shift_type, imm5, regN );
9289
9290         armSignedSatQ( irt_regN_shift, sat_imm, &irt_result, &irt_sat_Q );
9291         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9292
9293         if (isT)
9294            putIRegT( regD, mkexpr(irt_result), condT );
9295         else
9296            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9297
9298         DIP( "ssat%s r%u, #0x%04x, %s\n",
9299              nCC(conq), regD, imm5, dis_buf);
9300         return True;
9301     }
9302     /* fall through */
9303   }
9304
9305    /* ----------- ssat16<c> <Rd>,#<imm>,<Rn> ----------- */
9306    {
9307      UInt regD = 99, regN = 99, sat_imm = 99;
9308      Bool gate = False;
9309
9310      if (isT) {
9311         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9312             && INSNT0(5,4) == BITS2(1,0)
9313             && INSNT1(15,12) == BITS4(0,0,0,0)
9314             && INSNT1(7,4) == BITS4(0,0,0,0)) {
9315            regD       = INSNT1(11,8);
9316            regN       = INSNT0(3,0);
9317            sat_imm    = INSNT1(3,0) + 1;
9318            if (!isBadRegT(regD) && !isBadRegT(regN))
9319               gate = True;
9320         }
9321      } else {
9322         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,1,0) &&
9323             INSNA(11,4)   == BITS8(1,1,1,1,0,0,1,1)) {
9324            regD       = INSNA(15,12);
9325            regN       = INSNA(3,0);
9326            sat_imm    = INSNA(19,16) + 1;
9327            if (regD != 15 && regN != 15)
9328               gate = True;
9329         }
9330      }
9331
9332      if (gate) {
9333         IRTemp irt_regN    = newTemp(Ity_I32);
9334         IRTemp irt_regN_lo = newTemp(Ity_I32);
9335         IRTemp irt_regN_hi = newTemp(Ity_I32);
9336         IRTemp irt_Q_lo    = newTemp(Ity_I32);
9337         IRTemp irt_Q_hi    = newTemp(Ity_I32);
9338         IRTemp irt_res_lo  = newTemp(Ity_I32);
9339         IRTemp irt_res_hi  = newTemp(Ity_I32);
9340
9341         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9342         assign( irt_regN_lo,
9343                 binop( Iop_Sar32,
9344                        binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9345                        mkU8(16)) );
9346         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9347
9348         armSignedSatQ( irt_regN_lo, sat_imm, &irt_res_lo, &irt_Q_lo );
9349         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9350
9351         armSignedSatQ( irt_regN_hi, sat_imm, &irt_res_hi, &irt_Q_hi );
9352         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9353
9354         IRExpr* ire_result
9355            = binop(Iop_Or32,
9356                    binop(Iop_And32, mkexpr(irt_res_lo), mkU32(0xFFFF)),
9357                    binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)));
9358         if (isT)
9359            putIRegT( regD, ire_result, condT );
9360         else
9361            putIRegA( regD, ire_result, condT, Ijk_Boring );
9362
9363         DIP( "ssat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9364         return True;
9365      }
9366      /* fall through */
9367    }
9368
9369    /* -------------- usat16<c> <Rd>,#<imm4>,<Rn> --------------- */
9370    {
9371      UInt regD = 99, regN = 99, sat_imm = 99;
9372      Bool gate = False;
9373
9374      if (isT) {
9375         if (INSNT0(15,4) == 0xF3A && (INSNT1(15,0) & 0xF0F0) == 0x0000) {
9376            regN = INSNT0(3,0);
9377            regD = INSNT1(11,8);
9378            sat_imm = INSNT1(3,0);
9379            if (!isBadRegT(regD) && !isBadRegT(regN))
9380               gate = True;
9381        }
9382      } else {
9383         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,1,0) &&
9384             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9385             INSNA(7,4)   == BITS4(0,0,1,1)) {
9386            regD    = INSNA(15,12);
9387            regN    = INSNA(3,0);
9388            sat_imm = INSNA(19,16);
9389            if (regD != 15 && regN != 15)
9390               gate = True;
9391         }
9392      }
9393
9394      if (gate) {
9395         IRTemp irt_regN    = newTemp(Ity_I32);
9396         IRTemp irt_regN_lo = newTemp(Ity_I32);
9397         IRTemp irt_regN_hi = newTemp(Ity_I32);
9398         IRTemp irt_Q_lo    = newTemp(Ity_I32);
9399         IRTemp irt_Q_hi    = newTemp(Ity_I32);
9400         IRTemp irt_res_lo  = newTemp(Ity_I32);
9401         IRTemp irt_res_hi  = newTemp(Ity_I32);
9402
9403         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9404         assign( irt_regN_lo, binop( Iop_Sar32,
9405                                     binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9406                                     mkU8(16)) );
9407         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9408
9409         armUnsignedSatQ( &irt_res_lo, &irt_Q_lo, irt_regN_lo, sat_imm );
9410         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9411
9412         armUnsignedSatQ( &irt_res_hi, &irt_Q_hi, irt_regN_hi, sat_imm );
9413         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9414
9415         IRExpr* ire_result = binop( Iop_Or32,
9416                                     binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)),
9417                                     mkexpr(irt_res_lo) );
9418
9419         if (isT)
9420            putIRegT( regD, ire_result, condT );
9421         else
9422            putIRegA( regD, ire_result, condT, Ijk_Boring );
9423
9424         DIP( "usat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9425         return True;
9426      }
9427      /* fall through */
9428    }
9429
9430    /* -------------- uadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9431    {
9432      UInt regD = 99, regN = 99, regM = 99;
9433      Bool gate = False;
9434
9435      if (isT) {
9436         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9437            regN = INSNT0(3,0);
9438            regD = INSNT1(11,8);
9439            regM = INSNT1(3,0);
9440            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9441               gate = True;
9442         }
9443      } else {
9444         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9445             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9446             INSNA(7,4)   == BITS4(0,0,0,1)) {
9447            regD = INSNA(15,12);
9448            regN = INSNA(19,16);
9449            regM = INSNA(3,0);
9450            if (regD != 15 && regN != 15 && regM != 15)
9451               gate = True;
9452         }
9453      }
9454
9455      if (gate) {
9456         IRTemp rNt  = newTemp(Ity_I32);
9457         IRTemp rMt  = newTemp(Ity_I32);
9458         IRTemp res  = newTemp(Ity_I32);
9459         IRTemp reso = newTemp(Ity_I32);
9460
9461         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9462         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9463
9464         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9465         if (isT)
9466            putIRegT( regD, mkexpr(res), condT );
9467         else
9468            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9469
9470         assign(reso, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
9471         set_GE_32_10_from_bits_31_15(reso, condT);
9472
9473         DIP("uadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9474         return True;
9475      }
9476      /* fall through */
9477    }
9478
9479    /* -------------- sadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9480    {
9481      UInt regD = 99, regN = 99, regM = 99;
9482      Bool gate = False;
9483
9484      if (isT) {
9485         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9486            regN = INSNT0(3,0);
9487            regD = INSNT1(11,8);
9488            regM = INSNT1(3,0);
9489            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9490               gate = True;
9491         }
9492      } else {
9493         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9494             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9495             INSNA(7,4)   == BITS4(0,0,0,1)) {
9496            regD = INSNA(15,12);
9497            regN = INSNA(19,16);
9498            regM = INSNA(3,0);
9499            if (regD != 15 && regN != 15 && regM != 15)
9500               gate = True;
9501         }
9502      }
9503
9504      if (gate) {
9505         IRTemp rNt  = newTemp(Ity_I32);
9506         IRTemp rMt  = newTemp(Ity_I32);
9507         IRTemp res  = newTemp(Ity_I32);
9508         IRTemp reso = newTemp(Ity_I32);
9509
9510         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9511         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9512
9513         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9514         if (isT)
9515            putIRegT( regD, mkexpr(res), condT );
9516         else
9517            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9518
9519         assign(reso, unop(Iop_Not32,
9520                           binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt))));
9521         set_GE_32_10_from_bits_31_15(reso, condT);
9522
9523         DIP("sadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9524         return True;
9525      }
9526      /* fall through */
9527    }
9528
9529    /* ---------------- usub16<c> <Rd>,<Rn>,<Rm> ---------------- */
9530    {
9531      UInt regD = 99, regN = 99, regM = 99;
9532      Bool gate = False;
9533
9534      if (isT) {
9535         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9536            regN = INSNT0(3,0);
9537            regD = INSNT1(11,8);
9538            regM = INSNT1(3,0);
9539            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9540               gate = True;
9541         }
9542      } else {
9543         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9544             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9545             INSNA(7,4)   == BITS4(0,1,1,1)) {
9546            regD = INSNA(15,12);
9547            regN = INSNA(19,16);
9548            regM = INSNA(3,0);
9549            if (regD != 15 && regN != 15 && regM != 15)
9550              gate = True;
9551         }
9552      }
9553
9554      if (gate) {
9555         IRTemp rNt  = newTemp(Ity_I32);
9556         IRTemp rMt  = newTemp(Ity_I32);
9557         IRTemp res  = newTemp(Ity_I32);
9558         IRTemp reso = newTemp(Ity_I32);
9559
9560         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9561         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9562
9563         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9564         if (isT)
9565            putIRegT( regD, mkexpr(res), condT );
9566         else
9567            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9568
9569         assign(reso, unop(Iop_Not32,
9570                           binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt))));
9571         set_GE_32_10_from_bits_31_15(reso, condT);
9572
9573         DIP("usub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9574         return True;
9575      }
9576      /* fall through */
9577    }
9578
9579    /* -------------- ssub16<c> <Rd>,<Rn>,<Rm> -------------- */
9580    {
9581      UInt regD = 99, regN = 99, regM = 99;
9582      Bool gate = False;
9583
9584      if (isT) {
9585         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9586            regN = INSNT0(3,0);
9587            regD = INSNT1(11,8);
9588            regM = INSNT1(3,0);
9589            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9590               gate = True;
9591         }
9592      } else {
9593         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9594             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9595             INSNA(7,4)   == BITS4(0,1,1,1)) {
9596            regD = INSNA(15,12);
9597            regN = INSNA(19,16);
9598            regM = INSNA(3,0);
9599            if (regD != 15 && regN != 15 && regM != 15)
9600               gate = True;
9601         }
9602      }
9603
9604      if (gate) {
9605         IRTemp rNt  = newTemp(Ity_I32);
9606         IRTemp rMt  = newTemp(Ity_I32);
9607         IRTemp res  = newTemp(Ity_I32);
9608         IRTemp reso = newTemp(Ity_I32);
9609
9610         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9611         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9612
9613         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9614         if (isT)
9615            putIRegT( regD, mkexpr(res), condT );
9616         else
9617            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9618
9619         assign(reso, unop(Iop_Not32,
9620                           binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt))));
9621         set_GE_32_10_from_bits_31_15(reso, condT);
9622
9623         DIP("ssub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9624         return True;
9625      }
9626      /* fall through */
9627    }
9628
9629    /* ----------------- uadd8<c> <Rd>,<Rn>,<Rm> ---------------- */
9630    {
9631      UInt regD = 99, regN = 99, regM = 99;
9632      Bool gate = False;
9633
9634      if (isT) {
9635         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9636            regN = INSNT0(3,0);
9637            regD = INSNT1(11,8);
9638            regM = INSNT1(3,0);
9639            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9640               gate = True;
9641         }
9642      } else {
9643         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9644             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9645             (INSNA(7,4)  == BITS4(1,0,0,1))) {
9646            regD = INSNA(15,12);
9647            regN = INSNA(19,16);
9648            regM = INSNA(3,0);
9649            if (regD != 15 && regN != 15 && regM != 15)
9650               gate = True;
9651         }
9652      }
9653
9654      if (gate) {
9655         IRTemp rNt  = newTemp(Ity_I32);
9656         IRTemp rMt  = newTemp(Ity_I32);
9657         IRTemp res  = newTemp(Ity_I32);
9658         IRTemp reso = newTemp(Ity_I32);
9659
9660         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9661         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9662
9663         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9664         if (isT)
9665            putIRegT( regD, mkexpr(res), condT );
9666         else
9667            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9668
9669         assign(reso, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9670         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9671
9672         DIP("uadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9673         return True;
9674      }
9675      /* fall through */
9676    }
9677
9678    /* ------------------- sadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9679    {
9680      UInt regD = 99, regN = 99, regM = 99;
9681      Bool gate = False;
9682
9683      if (isT) {
9684         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9685            regN = INSNT0(3,0);
9686            regD = INSNT1(11,8);
9687            regM = INSNT1(3,0);
9688            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9689               gate = True;
9690         }
9691      } else {
9692         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9693             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9694             (INSNA(7,4)  == BITS4(1,0,0,1))) {
9695            regD = INSNA(15,12);
9696            regN = INSNA(19,16);
9697            regM = INSNA(3,0);
9698            if (regD != 15 && regN != 15 && regM != 15)
9699               gate = True;
9700         }
9701      }
9702
9703      if (gate) {
9704         IRTemp rNt  = newTemp(Ity_I32);
9705         IRTemp rMt  = newTemp(Ity_I32);
9706         IRTemp res  = newTemp(Ity_I32);
9707         IRTemp reso = newTemp(Ity_I32);
9708
9709         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9710         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9711
9712         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9713         if (isT)
9714            putIRegT( regD, mkexpr(res), condT );
9715         else
9716            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9717
9718         assign(reso, unop(Iop_Not32,
9719                           binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt))));
9720         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9721
9722         DIP("sadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9723         return True;
9724      }
9725      /* fall through */
9726    }
9727
9728    /* ------------------- usub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9729    {
9730      UInt regD = 99, regN = 99, regM = 99;
9731      Bool gate = False;
9732
9733      if (isT) {
9734         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9735            regN = INSNT0(3,0);
9736            regD = INSNT1(11,8);
9737            regM = INSNT1(3,0);
9738            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9739               gate = True;
9740         }
9741      } else {
9742         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9743             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9744             (INSNA(7,4)  == BITS4(1,1,1,1))) {
9745            regD = INSNA(15,12);
9746            regN = INSNA(19,16);
9747            regM = INSNA(3,0);
9748            if (regD != 15 && regN != 15 && regM != 15)
9749              gate = True;
9750         }
9751      }
9752
9753      if (gate) {
9754         IRTemp rNt  = newTemp(Ity_I32);
9755         IRTemp rMt  = newTemp(Ity_I32);
9756         IRTemp res  = newTemp(Ity_I32);
9757         IRTemp reso = newTemp(Ity_I32);
9758
9759         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9760         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9761
9762         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9763         if (isT)
9764            putIRegT( regD, mkexpr(res), condT );
9765         else
9766            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9767
9768         assign(reso, unop(Iop_Not32,
9769                           binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt))));
9770         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9771
9772         DIP("usub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9773         return True;
9774      }
9775      /* fall through */
9776    }
9777
9778    /* ------------------- ssub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9779    {
9780      UInt regD = 99, regN = 99, regM = 99;
9781      Bool gate = False;
9782
9783      if (isT) {
9784         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9785            regN = INSNT0(3,0);
9786            regD = INSNT1(11,8);
9787            regM = INSNT1(3,0);
9788            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9789               gate = True;
9790         }
9791      } else {
9792         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9793             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9794             INSNA(7,4)   == BITS4(1,1,1,1)) {
9795            regD = INSNA(15,12);
9796            regN = INSNA(19,16);
9797            regM = INSNA(3,0);
9798            if (regD != 15 && regN != 15 && regM != 15)
9799               gate = True;
9800         }
9801      }
9802
9803      if (gate) {
9804         IRTemp rNt  = newTemp(Ity_I32);
9805         IRTemp rMt  = newTemp(Ity_I32);
9806         IRTemp res  = newTemp(Ity_I32);
9807         IRTemp reso = newTemp(Ity_I32);
9808
9809         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9810         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9811
9812         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9813         if (isT)
9814            putIRegT( regD, mkexpr(res), condT );
9815         else
9816            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9817
9818         assign(reso, unop(Iop_Not32,
9819                           binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt))));
9820         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9821
9822         DIP("ssub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9823         return True;
9824      }
9825      /* fall through */
9826    }
9827
9828    /* ------------------ qadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
9829    {
9830      UInt regD = 99, regN = 99, regM = 99;
9831      Bool gate = False;
9832
9833      if (isT) {
9834         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9835            regN = INSNT0(3,0);
9836            regD = INSNT1(11,8);
9837            regM = INSNT1(3,0);
9838            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9839               gate = True;
9840         }
9841      } else {
9842         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9843             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9844             INSNA(7,4)   == BITS4(1,0,0,1)) {
9845            regD = INSNA(15,12);
9846            regN = INSNA(19,16);
9847            regM = INSNA(3,0);
9848            if (regD != 15 && regN != 15 && regM != 15)
9849               gate = True;
9850         }
9851      }
9852
9853      if (gate) {
9854         IRTemp rNt   = newTemp(Ity_I32);
9855         IRTemp rMt   = newTemp(Ity_I32);
9856         IRTemp res_q = newTemp(Ity_I32);
9857
9858         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9859         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9860
9861         assign(res_q, binop(Iop_QAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
9862         if (isT)
9863            putIRegT( regD, mkexpr(res_q), condT );
9864         else
9865            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9866
9867         DIP("qadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9868         return True;
9869      }
9870      /* fall through */
9871    }
9872
9873    /* ------------------ qsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
9874    {
9875      UInt regD = 99, regN = 99, regM = 99;
9876      Bool gate = False;
9877
9878      if (isT) {
9879         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9880            regN = INSNT0(3,0);
9881            regD = INSNT1(11,8);
9882            regM = INSNT1(3,0);
9883            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9884               gate = True;
9885         }
9886      } else {
9887         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9888             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9889             INSNA(7,4)   == BITS4(1,1,1,1)) {
9890            regD = INSNA(15,12);
9891            regN = INSNA(19,16);
9892            regM = INSNA(3,0);
9893            if (regD != 15 && regN != 15 && regM != 15)
9894               gate = True;
9895         }
9896      }
9897
9898      if (gate) {
9899         IRTemp rNt   = newTemp(Ity_I32);
9900         IRTemp rMt   = newTemp(Ity_I32);
9901         IRTemp res_q = newTemp(Ity_I32);
9902
9903         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9904         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9905
9906         assign(res_q, binop(Iop_QSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
9907         if (isT)
9908            putIRegT( regD, mkexpr(res_q), condT );
9909         else
9910            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9911
9912         DIP("qsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9913         return True;
9914      }
9915      /* fall through */
9916    }
9917
9918    /* ------------------ uqadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9919    {
9920      UInt regD = 99, regN = 99, regM = 99;
9921      Bool gate = False;
9922
9923      if (isT) {
9924         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9925            regN = INSNT0(3,0);
9926            regD = INSNT1(11,8);
9927            regM = INSNT1(3,0);
9928            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9929               gate = True;
9930         }
9931      } else {
9932         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9933             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9934             (INSNA(7,4)  == BITS4(1,0,0,1))) {
9935            regD = INSNA(15,12);
9936            regN = INSNA(19,16);
9937            regM = INSNA(3,0);
9938            if (regD != 15 && regN != 15 && regM != 15)
9939               gate = True;
9940         }
9941      }
9942
9943      if (gate) {
9944         IRTemp rNt   = newTemp(Ity_I32);
9945         IRTemp rMt   = newTemp(Ity_I32);
9946         IRTemp res_q = newTemp(Ity_I32);
9947
9948         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9949         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9950
9951         assign(res_q, binop(Iop_QAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9952         if (isT)
9953            putIRegT( regD, mkexpr(res_q), condT );
9954         else
9955            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9956
9957         DIP("uqadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9958         return True;
9959      }
9960      /* fall through */
9961    }
9962
9963    /* ------------------ uqsub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9964    {
9965      UInt regD = 99, regN = 99, regM = 99;
9966      Bool gate = False;
9967
9968      if (isT) {
9969         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9970            regN = INSNT0(3,0);
9971            regD = INSNT1(11,8);
9972            regM = INSNT1(3,0);
9973            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9974               gate = True;
9975         }
9976      } else {
9977         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9978             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9979             (INSNA(7,4)  == BITS4(1,1,1,1))) {
9980            regD = INSNA(15,12);
9981            regN = INSNA(19,16);
9982            regM = INSNA(3,0);
9983            if (regD != 15 && regN != 15 && regM != 15)
9984              gate = True;
9985         }
9986      }
9987
9988      if (gate) {
9989         IRTemp rNt   = newTemp(Ity_I32);
9990         IRTemp rMt   = newTemp(Ity_I32);
9991         IRTemp res_q = newTemp(Ity_I32);
9992
9993         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9994         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9995
9996         assign(res_q, binop(Iop_QSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
9997         if (isT)
9998            putIRegT( regD, mkexpr(res_q), condT );
9999         else
10000            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10001
10002         DIP("uqsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10003         return True;
10004      }
10005      /* fall through */
10006    }
10007
10008    /* ----------------- uhadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
10009    {
10010      UInt regD = 99, regN = 99, regM = 99;
10011      Bool gate = False;
10012
10013      if (isT) {
10014         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
10015            regN = INSNT0(3,0);
10016            regD = INSNT1(11,8);
10017            regM = INSNT1(3,0);
10018            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10019               gate = True;
10020         }
10021      } else {
10022         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
10023             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10024             INSNA(7,4)   == BITS4(1,0,0,1)) {
10025            regD = INSNA(15,12);
10026            regN = INSNA(19,16);
10027            regM = INSNA(3,0);
10028            if (regD != 15 && regN != 15 && regM != 15)
10029               gate = True;
10030         }
10031      }
10032
10033      if (gate) {
10034         IRTemp rNt   = newTemp(Ity_I32);
10035         IRTemp rMt   = newTemp(Ity_I32);
10036         IRTemp res_q = newTemp(Ity_I32);
10037
10038         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10039         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10040
10041         assign(res_q, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
10042         if (isT)
10043            putIRegT( regD, mkexpr(res_q), condT );
10044         else
10045            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10046
10047         DIP("uhadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10048         return True;
10049      }
10050      /* fall through */
10051    }
10052
10053    /* ----------------- uhadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
10054    {
10055      UInt regD = 99, regN = 99, regM = 99;
10056      Bool gate = False;
10057
10058      if (isT) {
10059         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
10060            regN = INSNT0(3,0);
10061            regD = INSNT1(11,8);
10062            regM = INSNT1(3,0);
10063            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10064               gate = True;
10065         }
10066      } else {
10067         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
10068             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10069             INSNA(7,4)   == BITS4(0,0,0,1)) {
10070            regD = INSNA(15,12);
10071            regN = INSNA(19,16);
10072            regM = INSNA(3,0);
10073            if (regD != 15 && regN != 15 && regM != 15)
10074               gate = True;
10075         }
10076      }
10077
10078      if (gate) {
10079         IRTemp rNt   = newTemp(Ity_I32);
10080         IRTemp rMt   = newTemp(Ity_I32);
10081         IRTemp res_q = newTemp(Ity_I32);
10082
10083         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10084         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10085
10086         assign(res_q, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
10087         if (isT)
10088            putIRegT( regD, mkexpr(res_q), condT );
10089         else
10090            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10091
10092         DIP("uhadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10093         return True;
10094      }
10095      /* fall through */
10096    }
10097
10098    /* ----------------- shadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
10099    {
10100      UInt regD = 99, regN = 99, regM = 99;
10101      Bool gate = False;
10102
10103      if (isT) {
10104         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
10105            regN = INSNT0(3,0);
10106            regD = INSNT1(11,8);
10107            regM = INSNT1(3,0);
10108            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10109               gate = True;
10110         }
10111      } else {
10112         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
10113             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10114             INSNA(7,4)   == BITS4(1,0,0,1)) {
10115            regD = INSNA(15,12);
10116            regN = INSNA(19,16);
10117            regM = INSNA(3,0);
10118            if (regD != 15 && regN != 15 && regM != 15)
10119               gate = True;
10120         }
10121      }
10122
10123      if (gate) {
10124         IRTemp rNt   = newTemp(Ity_I32);
10125         IRTemp rMt   = newTemp(Ity_I32);
10126         IRTemp res_q = newTemp(Ity_I32);
10127
10128         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10129         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10130
10131         assign(res_q, binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
10132         if (isT)
10133            putIRegT( regD, mkexpr(res_q), condT );
10134         else
10135            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10136
10137         DIP("shadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10138         return True;
10139      }
10140      /* fall through */
10141    }
10142
10143    /* ------------------ qadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
10144    {
10145      UInt regD = 99, regN = 99, regM = 99;
10146      Bool gate = False;
10147
10148      if (isT) {
10149         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10150            regN = INSNT0(3,0);
10151            regD = INSNT1(11,8);
10152            regM = INSNT1(3,0);
10153            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10154               gate = True;
10155         }
10156      } else {
10157         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10158             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10159             INSNA(7,4)   == BITS4(0,0,0,1)) {
10160            regD = INSNA(15,12);
10161            regN = INSNA(19,16);
10162            regM = INSNA(3,0);
10163            if (regD != 15 && regN != 15 && regM != 15)
10164               gate = True;
10165         }
10166      }
10167
10168      if (gate) {
10169         IRTemp rNt   = newTemp(Ity_I32);
10170         IRTemp rMt   = newTemp(Ity_I32);
10171         IRTemp res_q = newTemp(Ity_I32);
10172
10173         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10174         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10175
10176         assign(res_q, binop(Iop_QAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
10177         if (isT)
10178            putIRegT( regD, mkexpr(res_q), condT );
10179         else
10180            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10181
10182         DIP("qadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10183         return True;
10184      }
10185      /* fall through */
10186    }
10187
10188    /* ------------------ qsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
10189    {
10190      UInt regD = 99, regN = 99, regM = 99;
10191      Bool gate = False;
10192
10193       if (isT) {
10194         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10195            regN = INSNT0(3,0);
10196            regD = INSNT1(11,8);
10197            regM = INSNT1(3,0);
10198            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10199               gate = True;
10200         }
10201      } else {
10202         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10203             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10204             INSNA(7,4)   == BITS4(0,1,1,1)) {
10205            regD = INSNA(15,12);
10206            regN = INSNA(19,16);
10207            regM = INSNA(3,0);
10208            if (regD != 15 && regN != 15 && regM != 15)
10209              gate = True;
10210         }
10211      }
10212
10213      if (gate) {
10214         IRTemp rNt   = newTemp(Ity_I32);
10215         IRTemp rMt   = newTemp(Ity_I32);
10216         IRTemp res_q = newTemp(Ity_I32);
10217
10218         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10219         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10220
10221         assign(res_q, binop(Iop_QSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
10222         if (isT)
10223            putIRegT( regD, mkexpr(res_q), condT );
10224         else
10225            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10226
10227         DIP("qsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10228         return True;
10229      }
10230      /* fall through */
10231    }
10232
10233    /* ------------------- qsax<c> <Rd>,<Rn>,<Rm> ------------------- */
10234    /* note: the hardware seems to construct the result differently
10235       from wot the manual says. */
10236    {
10237      UInt regD = 99, regN = 99, regM = 99;
10238      Bool gate = False;
10239
10240      if (isT) {
10241         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10242            regN = INSNT0(3,0);
10243            regD = INSNT1(11,8);
10244            regM = INSNT1(3,0);
10245            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10246               gate = True;
10247         }
10248      } else {
10249         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10250             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10251             INSNA(7,4)   == BITS4(0,1,0,1)) {
10252            regD = INSNA(15,12);
10253            regN = INSNA(19,16);
10254            regM = INSNA(3,0);
10255            if (regD != 15 && regN != 15 && regM != 15)
10256               gate = True;
10257         }
10258      }
10259
10260      if (gate) {
10261         IRTemp irt_regN     = newTemp(Ity_I32);
10262         IRTemp irt_regM     = newTemp(Ity_I32);
10263         IRTemp irt_sum      = newTemp(Ity_I32);
10264         IRTemp irt_diff     = newTemp(Ity_I32);
10265         IRTemp irt_sum_res  = newTemp(Ity_I32);
10266         IRTemp irt_diff_res = newTemp(Ity_I32);
10267
10268         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10269         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10270
10271         assign( irt_diff,
10272                 binop( Iop_Sub32,
10273                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10274                        binop( Iop_Sar32,
10275                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10276                               mkU8(16) ) ) );
10277         armSignedSatQ( irt_diff, 0x10, &irt_diff_res, NULL);
10278
10279         assign( irt_sum,
10280                 binop( Iop_Add32,
10281                        binop( Iop_Sar32,
10282                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10283                               mkU8(16) ),
10284                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) )) );
10285         armSignedSatQ( irt_sum, 0x10, &irt_sum_res, NULL );
10286
10287         IRExpr* ire_result = binop( Iop_Or32,
10288                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
10289                                            mkU8(16) ),
10290                                     binop( Iop_And32, mkexpr(irt_sum_res),
10291                                            mkU32(0xFFFF)) );
10292
10293         if (isT)
10294            putIRegT( regD, ire_result, condT );
10295         else
10296            putIRegA( regD, ire_result, condT, Ijk_Boring );
10297
10298         DIP( "qsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10299         return True;
10300      }
10301      /* fall through */
10302    }
10303
10304    /* ------------------- qasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10305    {
10306      UInt regD = 99, regN = 99, regM = 99;
10307      Bool gate = False;
10308
10309      if (isT) {
10310         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10311            regN = INSNT0(3,0);
10312            regD = INSNT1(11,8);
10313            regM = INSNT1(3,0);
10314            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10315               gate = True;
10316         }
10317      } else {
10318         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10319             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10320             INSNA(7,4)   == BITS4(0,0,1,1)) {
10321            regD = INSNA(15,12);
10322            regN = INSNA(19,16);
10323            regM = INSNA(3,0);
10324            if (regD != 15 && regN != 15 && regM != 15)
10325               gate = True;
10326         }
10327      }
10328
10329      if (gate) {
10330         IRTemp irt_regN     = newTemp(Ity_I32);
10331         IRTemp irt_regM     = newTemp(Ity_I32);
10332         IRTemp irt_sum      = newTemp(Ity_I32);
10333         IRTemp irt_diff     = newTemp(Ity_I32);
10334         IRTemp irt_res_sum  = newTemp(Ity_I32);
10335         IRTemp irt_res_diff = newTemp(Ity_I32);
10336
10337         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10338         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10339
10340         assign( irt_diff,
10341                 binop( Iop_Sub32,
10342                        binop( Iop_Sar32,
10343                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10344                               mkU8(16) ),
10345                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10346         armSignedSatQ( irt_diff, 0x10, &irt_res_diff, NULL );
10347
10348         assign( irt_sum,
10349                 binop( Iop_Add32,
10350                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10351                        binop( Iop_Sar32,
10352                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10353                               mkU8(16) ) ) );
10354         armSignedSatQ( irt_sum, 0x10, &irt_res_sum, NULL );
10355
10356         IRExpr* ire_result
10357           = binop( Iop_Or32,
10358                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
10359                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
10360
10361         if (isT)
10362            putIRegT( regD, ire_result, condT );
10363         else
10364            putIRegA( regD, ire_result, condT, Ijk_Boring );
10365
10366         DIP( "qasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10367         return True;
10368      }
10369      /* fall through */
10370    }
10371
10372    /* ------------------- sasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10373    {
10374      UInt regD = 99, regN = 99, regM = 99;
10375      Bool gate = False;
10376
10377      if (isT) {
10378         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
10379            regN = INSNT0(3,0);
10380            regD = INSNT1(11,8);
10381            regM = INSNT1(3,0);
10382            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10383               gate = True;
10384         }
10385      } else {
10386         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
10387             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10388             INSNA(7,4)   == BITS4(0,0,1,1)) {
10389            regD = INSNA(15,12);
10390            regN = INSNA(19,16);
10391            regM = INSNA(3,0);
10392            if (regD != 15 && regN != 15 && regM != 15)
10393               gate = True;
10394         }
10395      }
10396
10397      if (gate) {
10398         IRTemp irt_regN = newTemp(Ity_I32);
10399         IRTemp irt_regM = newTemp(Ity_I32);
10400         IRTemp irt_sum  = newTemp(Ity_I32);
10401         IRTemp irt_diff = newTemp(Ity_I32);
10402
10403         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10404         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10405
10406         assign( irt_diff,
10407                 binop( Iop_Sub32,
10408                        binop( Iop_Sar32,
10409                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10410                               mkU8(16) ),
10411                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10412
10413         assign( irt_sum,
10414                 binop( Iop_Add32,
10415                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10416                        binop( Iop_Sar32,
10417                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10418                               mkU8(16) ) ) );
10419
10420         IRExpr* ire_result
10421           = binop( Iop_Or32,
10422                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
10423                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
10424
10425         IRTemp ge10 = newTemp(Ity_I32);
10426         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
10427         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
10428         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
10429
10430         IRTemp ge32 = newTemp(Ity_I32);
10431         assign(ge32, unop(Iop_Not32, mkexpr(irt_sum)));
10432         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
10433         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
10434
10435         if (isT)
10436            putIRegT( regD, ire_result, condT );
10437         else
10438            putIRegA( regD, ire_result, condT, Ijk_Boring );
10439
10440         DIP( "sasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10441         return True;
10442      }
10443      /* fall through */
10444    }
10445
10446    /* --------------- smuad, smuadx<c><Rd>,<Rn>,<Rm> --------------- */
10447    /* --------------- smsad, smsadx<c><Rd>,<Rn>,<Rm> --------------- */
10448    {
10449      UInt regD = 99, regN = 99, regM = 99, bitM = 99;
10450      Bool gate = False, isAD = False;
10451
10452      if (isT) {
10453         if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10454             && (INSNT1(15,0) & 0xF0E0) == 0xF000) {
10455            regN = INSNT0(3,0);
10456            regD = INSNT1(11,8);
10457            regM = INSNT1(3,0);
10458            bitM = INSNT1(4,4);
10459            isAD = INSNT0(15,4) == 0xFB2;
10460            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10461               gate = True;
10462         }
10463      } else {
10464         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10465             INSNA(15,12) == BITS4(1,1,1,1)         &&
10466             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1) ) {
10467            regD = INSNA(19,16);
10468            regN = INSNA(3,0);
10469            regM = INSNA(11,8);
10470            bitM = INSNA(5,5);
10471            isAD = INSNA(6,6) == 0;
10472            if (regD != 15 && regN != 15 && regM != 15)
10473               gate = True;
10474         }
10475      }
10476
10477      if (gate) {
10478         IRTemp irt_regN    = newTemp(Ity_I32);
10479         IRTemp irt_regM    = newTemp(Ity_I32);
10480         IRTemp irt_prod_lo = newTemp(Ity_I32);
10481         IRTemp irt_prod_hi = newTemp(Ity_I32);
10482         IRTemp tmpM        = newTemp(Ity_I32);
10483
10484         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10485
10486         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10487         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10488
10489         assign( irt_prod_lo,
10490                 binop( Iop_Mul32,
10491                        binop( Iop_Sar32,
10492                               binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
10493                               mkU8(16) ),
10494                        binop( Iop_Sar32,
10495                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10496                               mkU8(16) ) ) );
10497         assign( irt_prod_hi, binop(Iop_Mul32,
10498                                    binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)),
10499                                    binop(Iop_Sar32, mkexpr(irt_regM), mkU8(16))) );
10500         IRExpr* ire_result
10501            = binop( isAD ? Iop_Add32 : Iop_Sub32,
10502                     mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) );
10503
10504         if (isT)
10505            putIRegT( regD, ire_result, condT );
10506         else
10507            putIRegA( regD, ire_result, condT, Ijk_Boring );
10508
10509         if (isAD) {
10510            or_into_QFLAG32(
10511               signed_overflow_after_Add32( ire_result,
10512                                            irt_prod_lo, irt_prod_hi ),
10513               condT
10514            );
10515         }
10516
10517         DIP("smu%cd%s%s r%u, r%u, r%u\n",
10518             isAD ? 'a' : 's',
10519             bitM ? "x" : "", nCC(conq), regD, regN, regM);
10520         return True;
10521      }
10522      /* fall through */
10523    }
10524
10525    /* --------------- smlad{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10526    /* --------------- smlsd{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10527    {
10528      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10529      Bool gate = False, isAD = False;
10530
10531      if (isT) {
10532        if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10533            && INSNT1(7,5) == BITS3(0,0,0)) {
10534            regN = INSNT0(3,0);
10535            regD = INSNT1(11,8);
10536            regM = INSNT1(3,0);
10537            regA = INSNT1(15,12);
10538            bitM = INSNT1(4,4);
10539            isAD = INSNT0(15,4) == 0xFB2;
10540            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10541                && !isBadRegT(regA))
10542               gate = True;
10543         }
10544      } else {
10545         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10546             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
10547            regD = INSNA(19,16);
10548            regA = INSNA(15,12);
10549            regN = INSNA(3,0);
10550            regM = INSNA(11,8);
10551            bitM = INSNA(5,5);
10552            isAD = INSNA(6,6) == 0;
10553            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10554               gate = True;
10555         }
10556      }
10557
10558      if (gate) {
10559         IRTemp irt_regN    = newTemp(Ity_I32);
10560         IRTemp irt_regM    = newTemp(Ity_I32);
10561         IRTemp irt_regA    = newTemp(Ity_I32);
10562         IRTemp irt_prod_lo = newTemp(Ity_I32);
10563         IRTemp irt_prod_hi = newTemp(Ity_I32);
10564         IRTemp irt_sum     = newTemp(Ity_I32);
10565         IRTemp tmpM        = newTemp(Ity_I32);
10566
10567         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10568         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10569
10570         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10571         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10572
10573         assign( irt_prod_lo,
10574                 binop(Iop_Mul32,
10575                       binop(Iop_Sar32,
10576                             binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10577                             mkU8(16)),
10578                       binop(Iop_Sar32,
10579                             binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10580                             mkU8(16))) );
10581         assign( irt_prod_hi,
10582                 binop( Iop_Mul32,
10583                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10584                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10585         assign( irt_sum, binop( isAD ? Iop_Add32 : Iop_Sub32,
10586                                 mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ) );
10587
10588         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_sum), mkexpr(irt_regA));
10589
10590         if (isT)
10591            putIRegT( regD, ire_result, condT );
10592         else
10593            putIRegA( regD, ire_result, condT, Ijk_Boring );
10594
10595         if (isAD) {
10596            or_into_QFLAG32(
10597               signed_overflow_after_Add32( mkexpr(irt_sum),
10598                                            irt_prod_lo, irt_prod_hi ),
10599               condT
10600            );
10601         }
10602
10603         or_into_QFLAG32(
10604            signed_overflow_after_Add32( ire_result, irt_sum, irt_regA ),
10605            condT
10606         );
10607
10608         DIP("sml%cd%s%s r%u, r%u, r%u, r%u\n",
10609             isAD ? 'a' : 's',
10610             bitM ? "x" : "", nCC(conq), regD, regN, regM, regA);
10611         return True;
10612      }
10613      /* fall through */
10614    }
10615
10616    /* ----- smlabb, smlabt, smlatb, smlatt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10617    {
10618      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99, bitN = 99;
10619      Bool gate = False;
10620
10621      if (isT) {
10622         if (INSNT0(15,4) == 0xFB1 && INSNT1(7,6) == BITS2(0,0)) {
10623            regN = INSNT0(3,0);
10624            regD = INSNT1(11,8);
10625            regM = INSNT1(3,0);
10626            regA = INSNT1(15,12);
10627            bitM = INSNT1(4,4);
10628            bitN = INSNT1(5,5);
10629            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10630                && !isBadRegT(regA))
10631               gate = True;
10632         }
10633      } else {
10634         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
10635             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10636            regD = INSNA(19,16);
10637            regN = INSNA(3,0);
10638            regM = INSNA(11,8);
10639            regA = INSNA(15,12);
10640            bitM = INSNA(6,6);
10641            bitN = INSNA(5,5);
10642            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10643               gate = True;
10644         }
10645      }
10646
10647      if (gate) {
10648         IRTemp irt_regA = newTemp(Ity_I32);
10649         IRTemp irt_prod = newTemp(Ity_I32);
10650
10651         assign( irt_prod,
10652                 binop(Iop_Mul32,
10653                       binop(Iop_Sar32,
10654                             binop(Iop_Shl32,
10655                                   isT ? getIRegT(regN) : getIRegA(regN),
10656                                   mkU8(bitN ? 0 : 16)),
10657                             mkU8(16)),
10658                       binop(Iop_Sar32,
10659                             binop(Iop_Shl32,
10660                                   isT ? getIRegT(regM) : getIRegA(regM),
10661                                   mkU8(bitM ? 0 : 16)),
10662                             mkU8(16))) );
10663
10664         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10665
10666         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_prod), mkexpr(irt_regA));
10667
10668         if (isT)
10669            putIRegT( regD, ire_result, condT );
10670         else
10671            putIRegA( regD, ire_result, condT, Ijk_Boring );
10672
10673         or_into_QFLAG32(
10674            signed_overflow_after_Add32( ire_result, irt_prod, irt_regA ),
10675            condT
10676         );
10677
10678         DIP( "smla%c%c%s r%u, r%u, r%u, r%u\n",
10679              bitN ? 't' : 'b', bitM ? 't' : 'b',
10680              nCC(conq), regD, regN, regM, regA );
10681         return True;
10682      }
10683      /* fall through */
10684    }
10685
10686    /* ----- smlalbb, smlalbt, smlaltb, smlaltt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10687    {
10688      UInt regDHi = 99, regN = 99, regM = 99, regDLo = 99, bitM = 99, bitN = 99;
10689      Bool gate = False;
10690
10691      if (isT) {
10692         if (INSNT0(15,4) == 0xFBC && INSNT1(7,6) == BITS2(1,0)) {
10693            regN   = INSNT0(3,0);
10694            regDHi = INSNT1(11,8);
10695            regM   = INSNT1(3,0);
10696            regDLo = INSNT1(15,12);
10697            bitM   = INSNT1(4,4);
10698            bitN   = INSNT1(5,5);
10699            if (!isBadRegT(regDHi) && !isBadRegT(regN) && !isBadRegT(regM)
10700                && !isBadRegT(regDLo) && regDHi != regDLo)
10701               gate = True;
10702         }
10703      } else {
10704         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
10705             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10706            regDHi = INSNA(19,16);
10707            regN   = INSNA(3,0);
10708            regM   = INSNA(11,8);
10709            regDLo = INSNA(15,12);
10710            bitM   = INSNA(6,6);
10711            bitN   = INSNA(5,5);
10712            if (regDHi != 15 && regN != 15 && regM != 15 && regDLo != 15 &&
10713                regDHi != regDLo)
10714               gate = True;
10715         }
10716      }
10717
10718      if (gate) {
10719         IRTemp irt_regD  = newTemp(Ity_I64);
10720         IRTemp irt_prod  = newTemp(Ity_I64);
10721         IRTemp irt_res   = newTemp(Ity_I64);
10722         IRTemp irt_resHi = newTemp(Ity_I32);
10723         IRTemp irt_resLo = newTemp(Ity_I32);
10724
10725         assign( irt_prod,
10726                 binop(Iop_MullS32,
10727                       binop(Iop_Sar32,
10728                             binop(Iop_Shl32,
10729                                   isT ? getIRegT(regN) : getIRegA(regN),
10730                                   mkU8(bitN ? 0 : 16)),
10731                             mkU8(16)),
10732                       binop(Iop_Sar32,
10733                             binop(Iop_Shl32,
10734                                   isT ? getIRegT(regM) : getIRegA(regM),
10735                                   mkU8(bitM ? 0 : 16)),
10736                             mkU8(16))) );
10737
10738         assign( irt_regD, binop(Iop_32HLto64,
10739                                 isT ? getIRegT(regDHi) : getIRegA(regDHi),
10740                                 isT ? getIRegT(regDLo) : getIRegA(regDLo)) );
10741         assign( irt_res, binop(Iop_Add64, mkexpr(irt_regD), mkexpr(irt_prod)) );
10742         assign( irt_resHi, unop(Iop_64HIto32, mkexpr(irt_res)) );
10743         assign( irt_resLo, unop(Iop_64to32, mkexpr(irt_res)) );
10744
10745         if (isT) {
10746            putIRegT( regDHi, mkexpr(irt_resHi), condT );
10747            putIRegT( regDLo, mkexpr(irt_resLo), condT );
10748         } else {
10749            putIRegA( regDHi, mkexpr(irt_resHi), condT, Ijk_Boring );
10750            putIRegA( regDLo, mkexpr(irt_resLo), condT, Ijk_Boring );
10751         }
10752
10753         DIP( "smlal%c%c%s r%u, r%u, r%u, r%u\n",
10754              bitN ? 't' : 'b', bitM ? 't' : 'b',
10755              nCC(conq), regDHi, regN, regM, regDLo );
10756         return True;
10757      }
10758      /* fall through */
10759    }
10760
10761    /* ----- smlawb, smlawt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10762    {
10763      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10764      Bool gate = False;
10765
10766      if (isT) {
10767         if (INSNT0(15,4) == 0xFB3 && INSNT1(7,5) == BITS3(0,0,0)) {
10768            regN = INSNT0(3,0);
10769            regD = INSNT1(11,8);
10770            regM = INSNT1(3,0);
10771            regA = INSNT1(15,12);
10772            bitM = INSNT1(4,4);
10773            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10774                && !isBadRegT(regA))
10775               gate = True;
10776         }
10777      } else {
10778         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
10779             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,0,0)) {
10780            regD = INSNA(19,16);
10781            regN = INSNA(3,0);
10782            regM = INSNA(11,8);
10783            regA = INSNA(15,12);
10784            bitM = INSNA(6,6);
10785            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10786               gate = True;
10787         }
10788      }
10789
10790      if (gate) {
10791         IRTemp irt_regA = newTemp(Ity_I32);
10792         IRTemp irt_prod = newTemp(Ity_I64);
10793
10794         assign( irt_prod,
10795                 binop(Iop_MullS32,
10796                       isT ? getIRegT(regN) : getIRegA(regN),
10797                       binop(Iop_Sar32,
10798                             binop(Iop_Shl32,
10799                                   isT ? getIRegT(regM) : getIRegA(regM),
10800                                   mkU8(bitM ? 0 : 16)),
10801                             mkU8(16))) );
10802
10803         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10804
10805         IRTemp prod32 = newTemp(Ity_I32);
10806         assign(prod32,
10807                binop(Iop_Or32,
10808                      binop(Iop_Shl32, unop(Iop_64HIto32, mkexpr(irt_prod)), mkU8(16)),
10809                      binop(Iop_Shr32, unop(Iop_64to32, mkexpr(irt_prod)), mkU8(16))
10810         ));
10811
10812         IRExpr* ire_result = binop(Iop_Add32, mkexpr(prod32), mkexpr(irt_regA));
10813
10814         if (isT)
10815            putIRegT( regD, ire_result, condT );
10816         else
10817            putIRegA( regD, ire_result, condT, Ijk_Boring );
10818
10819         or_into_QFLAG32(
10820            signed_overflow_after_Add32( ire_result, prod32, irt_regA ),
10821            condT
10822         );
10823
10824         DIP( "smlaw%c%s r%u, r%u, r%u, r%u\n",
10825              bitM ? 't' : 'b',
10826              nCC(conq), regD, regN, regM, regA );
10827         return True;
10828      }
10829      /* fall through */
10830    }
10831
10832    /* ------------------- sel<c> <Rd>,<Rn>,<Rm> -------------------- */
10833    /* fixme: fix up the test in v6media.c so that we can pass the ge
10834       flags as part of the test. */
10835    {
10836      UInt regD = 99, regN = 99, regM = 99;
10837      Bool gate = False;
10838
10839      if (isT) {
10840         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
10841            regN = INSNT0(3,0);
10842            regD = INSNT1(11,8);
10843            regM = INSNT1(3,0);
10844            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10845               gate = True;
10846         }
10847      } else {
10848         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
10849             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10850             INSNA(7,4)   == BITS4(1,0,1,1)) {
10851            regD = INSNA(15,12);
10852            regN = INSNA(19,16);
10853            regM = INSNA(3,0);
10854            if (regD != 15 && regN != 15 && regM != 15)
10855               gate = True;
10856         }
10857      }
10858
10859      if (gate) {
10860         IRTemp irt_ge_flag0 = newTemp(Ity_I32);
10861         IRTemp irt_ge_flag1 = newTemp(Ity_I32);
10862         IRTemp irt_ge_flag2 = newTemp(Ity_I32);
10863         IRTemp irt_ge_flag3 = newTemp(Ity_I32);
10864
10865         assign( irt_ge_flag0, get_GEFLAG32(0) );
10866         assign( irt_ge_flag1, get_GEFLAG32(1) );
10867         assign( irt_ge_flag2, get_GEFLAG32(2) );
10868         assign( irt_ge_flag3, get_GEFLAG32(3) );
10869
10870         IRExpr* ire_ge_flag0_or
10871           = binop(Iop_Or32, mkexpr(irt_ge_flag0),
10872                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag0)));
10873         IRExpr* ire_ge_flag1_or
10874           = binop(Iop_Or32, mkexpr(irt_ge_flag1),
10875                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag1)));
10876         IRExpr* ire_ge_flag2_or
10877           = binop(Iop_Or32, mkexpr(irt_ge_flag2),
10878                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag2)));
10879         IRExpr* ire_ge_flag3_or
10880           = binop(Iop_Or32, mkexpr(irt_ge_flag3),
10881                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag3)));
10882
10883         IRExpr* ire_ge_flags
10884           = binop( Iop_Or32,
10885                    binop(Iop_Or32,
10886                          binop(Iop_And32,
10887                                binop(Iop_Sar32, ire_ge_flag0_or, mkU8(31)),
10888                                mkU32(0x000000ff)),
10889                          binop(Iop_And32,
10890                                binop(Iop_Sar32, ire_ge_flag1_or, mkU8(31)),
10891                                mkU32(0x0000ff00))),
10892                    binop(Iop_Or32,
10893                          binop(Iop_And32,
10894                                binop(Iop_Sar32, ire_ge_flag2_or, mkU8(31)),
10895                                mkU32(0x00ff0000)),
10896                          binop(Iop_And32,
10897                                binop(Iop_Sar32, ire_ge_flag3_or, mkU8(31)),
10898                                mkU32(0xff000000))) );
10899
10900         IRExpr* ire_result
10901           = binop(Iop_Or32,
10902                   binop(Iop_And32,
10903                         isT ? getIRegT(regN) : getIRegA(regN),
10904                         ire_ge_flags ),
10905                   binop(Iop_And32,
10906                         isT ? getIRegT(regM) : getIRegA(regM),
10907                         unop(Iop_Not32, ire_ge_flags)));
10908
10909         if (isT)
10910            putIRegT( regD, ire_result, condT );
10911         else
10912            putIRegA( regD, ire_result, condT, Ijk_Boring );
10913
10914         DIP("sel%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10915         return True;
10916      }
10917      /* fall through */
10918    }
10919
10920    /* ----------------- uxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
10921    {
10922      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
10923      Bool gate = False;
10924
10925      if (isT) {
10926         if (INSNT0(15,4) == 0xFA3 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
10927            regN   = INSNT0(3,0);
10928            regD   = INSNT1(11,8);
10929            regM   = INSNT1(3,0);
10930            rotate = INSNT1(5,4);
10931            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10932               gate = True;
10933         }
10934      } else {
10935         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,0,0) &&
10936             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
10937            regD   = INSNA(15,12);
10938            regN   = INSNA(19,16);
10939            regM   = INSNA(3,0);
10940            rotate = INSNA(11,10);
10941            if (regD != 15 && regN != 15 && regM != 15)
10942              gate = True;
10943         }
10944      }
10945
10946      if (gate) {
10947         IRTemp irt_regN = newTemp(Ity_I32);
10948         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10949
10950         IRTemp irt_regM = newTemp(Ity_I32);
10951         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10952
10953         IRTemp irt_rot = newTemp(Ity_I32);
10954         assign( irt_rot, binop(Iop_And32,
10955                                genROR32(irt_regM, 8 * rotate),
10956                                mkU32(0x00FF00FF)) );
10957
10958         IRExpr* resLo
10959            = binop(Iop_And32,
10960                    binop(Iop_Add32, mkexpr(irt_regN), mkexpr(irt_rot)),
10961                    mkU32(0x0000FFFF));
10962
10963         IRExpr* resHi
10964            = binop(Iop_Add32,
10965                    binop(Iop_And32, mkexpr(irt_regN), mkU32(0xFFFF0000)),
10966                    binop(Iop_And32, mkexpr(irt_rot),  mkU32(0xFFFF0000)));
10967
10968         IRExpr* ire_result
10969            = binop( Iop_Or32, resHi, resLo );
10970
10971         if (isT)
10972            putIRegT( regD, ire_result, condT );
10973         else
10974            putIRegA( regD, ire_result, condT, Ijk_Boring );
10975
10976         DIP( "uxtab16%s r%u, r%u, r%u, ROR #%u\n",
10977              nCC(conq), regD, regN, regM, 8 * rotate );
10978         return True;
10979      }
10980      /* fall through */
10981    }
10982
10983    /* --------------- usad8  Rd,Rn,Rm    ---------------- */
10984    /* --------------- usada8 Rd,Rn,Rm,Ra ---------------- */
10985    {
10986      UInt rD = 99, rN = 99, rM = 99, rA = 99;
10987      Bool gate = False;
10988
10989      if (isT) {
10990        if (INSNT0(15,4) == 0xFB7 && INSNT1(7,4) == BITS4(0,0,0,0)) {
10991            rN = INSNT0(3,0);
10992            rA = INSNT1(15,12);
10993            rD = INSNT1(11,8);
10994            rM = INSNT1(3,0);
10995            if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && rA != 13)
10996               gate = True;
10997         }
10998      } else {
10999         if (INSNA(27,20) == BITS8(0,1,1,1,1,0,0,0) &&
11000             INSNA(7,4)   == BITS4(0,0,0,1) ) {
11001            rD = INSNA(19,16);
11002            rA = INSNA(15,12);
11003            rM = INSNA(11,8);
11004            rN = INSNA(3,0);
11005            if (rD != 15 && rN != 15 && rM != 15 /* but rA can be 15 */)
11006               gate = True;
11007         }
11008      }
11009      /* We allow rA == 15, to denote the usad8 (no accumulator) case. */
11010
11011      if (gate) {
11012         IRExpr* rNe = isT ? getIRegT(rN) : getIRegA(rN);
11013         IRExpr* rMe = isT ? getIRegT(rM) : getIRegA(rM);
11014         IRExpr* rAe = rA == 15 ? mkU32(0)
11015                                : (isT ? getIRegT(rA) : getIRegA(rA));
11016         IRExpr* res = binop(Iop_Add32,
11017                             binop(Iop_Sad8Ux4, rNe, rMe),
11018                             rAe);
11019         if (isT)
11020            putIRegT( rD, res, condT );
11021         else
11022            putIRegA( rD, res, condT, Ijk_Boring );
11023
11024         if (rA == 15) {
11025            DIP( "usad8%s r%u, r%u, r%u\n",
11026                 nCC(conq), rD, rN, rM );
11027         } else {
11028            DIP( "usada8%s r%u, r%u, r%u, r%u\n",
11029                 nCC(conq), rD, rN, rM, rA );
11030         }
11031         return True;
11032      }
11033      /* fall through */
11034    }
11035
11036    /* ------------------ qadd<c> <Rd>,<Rn>,<Rm> ------------------- */
11037    {
11038      UInt regD = 99, regN = 99, regM = 99;
11039      Bool gate = False;
11040
11041      if (isT) {
11042         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
11043            regN = INSNT0(3,0);
11044            regD = INSNT1(11,8);
11045            regM = INSNT1(3,0);
11046            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11047               gate = True;
11048         }
11049      } else {
11050         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
11051             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11052             INSNA(7,4)   == BITS4(0,1,0,1)) {
11053            regD = INSNA(15,12);
11054            regN = INSNA(19,16);
11055            regM = INSNA(3,0);
11056            if (regD != 15 && regN != 15 && regM != 15)
11057               gate = True;
11058         }
11059      }
11060
11061      if (gate) {
11062         IRTemp rNt   = newTemp(Ity_I32);
11063         IRTemp rMt   = newTemp(Ity_I32);
11064         IRTemp res_q = newTemp(Ity_I32);
11065
11066         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11067         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11068
11069         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt)));
11070         if (isT)
11071            putIRegT( regD, mkexpr(res_q), condT );
11072         else
11073            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11074
11075         or_into_QFLAG32(
11076            signed_overflow_after_Add32(
11077               binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11078            condT
11079         );
11080
11081         DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11082         return True;
11083      }
11084      /* fall through */
11085    }
11086
11087    /* ------------------ qdadd<c> <Rd>,<Rm>,<Rn> ------------------- */
11088    {
11089      UInt regD = 99, regN = 99, regM = 99;
11090      Bool gate = False;
11091
11092      if (isT) {
11093         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF090) {
11094            regN = INSNT0(3,0);
11095            regD = INSNT1(11,8);
11096            regM = INSNT1(3,0);
11097            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11098               gate = True;
11099         }
11100      } else {
11101         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
11102             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11103             INSNA(7,4)   == BITS4(0,1,0,1)) {
11104            regD = INSNA(15,12);
11105            regN = INSNA(19,16);
11106            regM = INSNA(3,0);
11107            if (regD != 15 && regN != 15 && regM != 15)
11108               gate = True;
11109         }
11110      }
11111
11112      if (gate) {
11113         IRTemp rNt   = newTemp(Ity_I32);
11114         IRTemp rMt   = newTemp(Ity_I32);
11115         IRTemp rN_d  = newTemp(Ity_I32);
11116         IRTemp res_q = newTemp(Ity_I32);
11117
11118         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11119         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11120
11121         or_into_QFLAG32(
11122            signed_overflow_after_Add32(
11123               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11124            condT
11125         );
11126
11127         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11128         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rN_d)));
11129         if (isT)
11130            putIRegT( regD, mkexpr(res_q), condT );
11131         else
11132            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11133
11134         or_into_QFLAG32(
11135            signed_overflow_after_Add32(
11136               binop(Iop_Add32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11137            condT
11138         );
11139
11140         DIP("qdadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11141         return True;
11142      }
11143      /* fall through */
11144    }
11145
11146    /* ------------------ qsub<c> <Rd>,<Rn>,<Rm> ------------------- */
11147    {
11148      UInt regD = 99, regN = 99, regM = 99;
11149      Bool gate = False;
11150
11151      if (isT) {
11152         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) {
11153            regN = INSNT0(3,0);
11154            regD = INSNT1(11,8);
11155            regM = INSNT1(3,0);
11156            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11157               gate = True;
11158         }
11159      } else {
11160         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
11161             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11162             INSNA(7,4)   == BITS4(0,1,0,1)) {
11163            regD = INSNA(15,12);
11164            regN = INSNA(19,16);
11165            regM = INSNA(3,0);
11166            if (regD != 15 && regN != 15 && regM != 15)
11167               gate = True;
11168         }
11169      }
11170
11171      if (gate) {
11172         IRTemp rNt   = newTemp(Ity_I32);
11173         IRTemp rMt   = newTemp(Ity_I32);
11174         IRTemp res_q = newTemp(Ity_I32);
11175
11176         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11177         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11178
11179         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt)));
11180         if (isT)
11181            putIRegT( regD, mkexpr(res_q), condT );
11182         else
11183            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11184
11185         or_into_QFLAG32(
11186            signed_overflow_after_Sub32(
11187               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11188            condT
11189         );
11190
11191         DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11192         return True;
11193      }
11194      /* fall through */
11195    }
11196
11197    /* ------------------ qdsub<c> <Rd>,<Rm>,<Rn> ------------------- */
11198    {
11199      UInt regD = 99, regN = 99, regM = 99;
11200      Bool gate = False;
11201
11202      if (isT) {
11203         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0B0) {
11204            regN = INSNT0(3,0);
11205            regD = INSNT1(11,8);
11206            regM = INSNT1(3,0);
11207            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11208               gate = True;
11209         }
11210      } else {
11211         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,1,0) &&
11212             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11213             INSNA(7,4)   == BITS4(0,1,0,1)) {
11214            regD = INSNA(15,12);
11215            regN = INSNA(19,16);
11216            regM = INSNA(3,0);
11217            if (regD != 15 && regN != 15 && regM != 15)
11218               gate = True;
11219         }
11220      }
11221
11222      if (gate) {
11223         IRTemp rNt   = newTemp(Ity_I32);
11224         IRTemp rMt   = newTemp(Ity_I32);
11225         IRTemp rN_d  = newTemp(Ity_I32);
11226         IRTemp res_q = newTemp(Ity_I32);
11227
11228         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11229         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11230
11231         or_into_QFLAG32(
11232            signed_overflow_after_Add32(
11233               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11234            condT
11235         );
11236
11237         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11238         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rN_d)));
11239         if (isT)
11240            putIRegT( regD, mkexpr(res_q), condT );
11241         else
11242            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11243
11244         or_into_QFLAG32(
11245            signed_overflow_after_Sub32(
11246               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11247            condT
11248         );
11249
11250         DIP("qdsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11251         return True;
11252      }
11253      /* fall through */
11254    }
11255
11256    /* ------------------ uqsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
11257    {
11258      UInt regD = 99, regN = 99, regM = 99;
11259      Bool gate = False;
11260
11261      if (isT) {
11262         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11263            regN = INSNT0(3,0);
11264            regD = INSNT1(11,8);
11265            regM = INSNT1(3,0);
11266            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11267               gate = True;
11268         }
11269      } else {
11270         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11271             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11272             INSNA(7,4)   == BITS4(0,1,1,1)) {
11273            regD = INSNA(15,12);
11274            regN = INSNA(19,16);
11275            regM = INSNA(3,0);
11276            if (regD != 15 && regN != 15 && regM != 15)
11277              gate = True;
11278         }
11279      }
11280
11281      if (gate) {
11282         IRTemp rNt   = newTemp(Ity_I32);
11283         IRTemp rMt   = newTemp(Ity_I32);
11284         IRTemp res_q = newTemp(Ity_I32);
11285
11286         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11287         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11288
11289         assign(res_q, binop(Iop_QSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11290         if (isT)
11291            putIRegT( regD, mkexpr(res_q), condT );
11292         else
11293            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11294
11295         DIP("uqsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11296         return True;
11297      }
11298      /* fall through */
11299    }
11300
11301    /* ----------------- shadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
11302    {
11303      UInt regD = 99, regN = 99, regM = 99;
11304      Bool gate = False;
11305
11306      if (isT) {
11307         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11308            regN = INSNT0(3,0);
11309            regD = INSNT1(11,8);
11310            regM = INSNT1(3,0);
11311            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11312               gate = True;
11313         }
11314      } else {
11315         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11316             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11317             INSNA(7,4)   == BITS4(0,0,0,1)) {
11318            regD = INSNA(15,12);
11319            regN = INSNA(19,16);
11320            regM = INSNA(3,0);
11321            if (regD != 15 && regN != 15 && regM != 15)
11322               gate = True;
11323         }
11324      }
11325
11326      if (gate) {
11327         IRTemp rNt   = newTemp(Ity_I32);
11328         IRTemp rMt   = newTemp(Ity_I32);
11329         IRTemp res_q = newTemp(Ity_I32);
11330
11331         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11332         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11333
11334         assign(res_q, binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
11335         if (isT)
11336            putIRegT( regD, mkexpr(res_q), condT );
11337         else
11338            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11339
11340         DIP("shadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11341         return True;
11342      }
11343      /* fall through */
11344    }
11345
11346    /* ----------------- uhsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11347    {
11348      UInt regD = 99, regN = 99, regM = 99;
11349      Bool gate = False;
11350
11351      if (isT) {
11352         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11353            regN = INSNT0(3,0);
11354            regD = INSNT1(11,8);
11355            regM = INSNT1(3,0);
11356            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11357               gate = True;
11358         }
11359      } else {
11360         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11361             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11362             INSNA(7,4)   == BITS4(1,1,1,1)) {
11363            regD = INSNA(15,12);
11364            regN = INSNA(19,16);
11365            regM = INSNA(3,0);
11366            if (regD != 15 && regN != 15 && regM != 15)
11367               gate = True;
11368         }
11369      }
11370
11371      if (gate) {
11372         IRTemp rNt   = newTemp(Ity_I32);
11373         IRTemp rMt   = newTemp(Ity_I32);
11374         IRTemp res_q = newTemp(Ity_I32);
11375
11376         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11377         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11378
11379         assign(res_q, binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
11380         if (isT)
11381            putIRegT( regD, mkexpr(res_q), condT );
11382         else
11383            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11384
11385         DIP("uhsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11386         return True;
11387      }
11388      /* fall through */
11389    }
11390
11391    /* ----------------- uhsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
11392    {
11393      UInt regD = 99, regN = 99, regM = 99;
11394      Bool gate = False;
11395
11396      if (isT) {
11397         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11398            regN = INSNT0(3,0);
11399            regD = INSNT1(11,8);
11400            regM = INSNT1(3,0);
11401            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11402               gate = True;
11403         }
11404      } else {
11405         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11406             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11407             INSNA(7,4)   == BITS4(0,1,1,1)) {
11408            regD = INSNA(15,12);
11409            regN = INSNA(19,16);
11410            regM = INSNA(3,0);
11411            if (regD != 15 && regN != 15 && regM != 15)
11412               gate = True;
11413         }
11414      }
11415
11416      if (gate) {
11417         IRTemp rNt   = newTemp(Ity_I32);
11418         IRTemp rMt   = newTemp(Ity_I32);
11419         IRTemp res_q = newTemp(Ity_I32);
11420
11421         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11422         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11423
11424         assign(res_q, binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11425         if (isT)
11426            putIRegT( regD, mkexpr(res_q), condT );
11427         else
11428            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11429
11430         DIP("uhsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11431         return True;
11432      }
11433      /* fall through */
11434    }
11435
11436    /* ------------------ uqadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
11437    {
11438      UInt regD = 99, regN = 99, regM = 99;
11439      Bool gate = False;
11440
11441      if (isT) {
11442         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11443            regN = INSNT0(3,0);
11444            regD = INSNT1(11,8);
11445            regM = INSNT1(3,0);
11446            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11447               gate = True;
11448         }
11449      } else {
11450         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11451             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11452             INSNA(7,4)   == BITS4(0,0,0,1)) {
11453            regD = INSNA(15,12);
11454            regN = INSNA(19,16);
11455            regM = INSNA(3,0);
11456            if (regD != 15 && regN != 15 && regM != 15)
11457               gate = True;
11458         }
11459      }
11460
11461      if (gate) {
11462         IRTemp rNt   = newTemp(Ity_I32);
11463         IRTemp rMt   = newTemp(Ity_I32);
11464         IRTemp res_q = newTemp(Ity_I32);
11465
11466         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11467         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11468
11469         assign(res_q, binop(Iop_QAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
11470         if (isT)
11471            putIRegT( regD, mkexpr(res_q), condT );
11472         else
11473            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11474
11475         DIP("uqadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11476         return True;
11477      }
11478      /* fall through */
11479    }
11480
11481    /* ------------------- uqsax<c> <Rd>,<Rn>,<Rm> ------------------- */
11482    {
11483      UInt regD = 99, regN = 99, regM = 99;
11484      Bool gate = False;
11485
11486      if (isT) {
11487         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11488            regN = INSNT0(3,0);
11489            regD = INSNT1(11,8);
11490            regM = INSNT1(3,0);
11491            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11492               gate = True;
11493         }
11494      } else {
11495         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11496             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11497             INSNA(7,4)   == BITS4(0,1,0,1)) {
11498            regD = INSNA(15,12);
11499            regN = INSNA(19,16);
11500            regM = INSNA(3,0);
11501            if (regD != 15 && regN != 15 && regM != 15)
11502               gate = True;
11503         }
11504      }
11505
11506      if (gate) {
11507         IRTemp irt_regN     = newTemp(Ity_I32);
11508         IRTemp irt_regM     = newTemp(Ity_I32);
11509         IRTemp irt_sum      = newTemp(Ity_I32);
11510         IRTemp irt_diff     = newTemp(Ity_I32);
11511         IRTemp irt_sum_res  = newTemp(Ity_I32);
11512         IRTemp irt_diff_res = newTemp(Ity_I32);
11513
11514         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11515         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11516
11517         assign( irt_diff,
11518                 binop( Iop_Sub32,
11519                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11520                        binop( Iop_Shr32,
11521                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
11522                               mkU8(16) ) ) );
11523         armUnsignedSatQ( &irt_diff_res, NULL, irt_diff, 0x10);
11524
11525         assign( irt_sum,
11526                 binop( Iop_Add32,
11527                        binop( Iop_Shr32,
11528                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11529                               mkU8(16) ),
11530                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) )) );
11531         armUnsignedSatQ( &irt_sum_res, NULL, irt_sum, 0x10 );
11532
11533         IRExpr* ire_result = binop( Iop_Or32,
11534                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
11535                                            mkU8(16) ),
11536                                     binop( Iop_And32, mkexpr(irt_sum_res),
11537                                            mkU32(0xFFFF)) );
11538
11539         if (isT)
11540            putIRegT( regD, ire_result, condT );
11541         else
11542            putIRegA( regD, ire_result, condT, Ijk_Boring );
11543
11544         DIP( "uqsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11545         return True;
11546      }
11547      /* fall through */
11548    }
11549
11550    /* ------------------- uqasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11551    {
11552      UInt regD = 99, regN = 99, regM = 99;
11553      Bool gate = False;
11554
11555      if (isT) {
11556         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11557            regN = INSNT0(3,0);
11558            regD = INSNT1(11,8);
11559            regM = INSNT1(3,0);
11560            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11561               gate = True;
11562         }
11563      } else {
11564         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11565             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11566             INSNA(7,4)   == BITS4(0,0,1,1)) {
11567            regD = INSNA(15,12);
11568            regN = INSNA(19,16);
11569            regM = INSNA(3,0);
11570            if (regD != 15 && regN != 15 && regM != 15)
11571               gate = True;
11572         }
11573      }
11574
11575      if (gate) {
11576         IRTemp irt_regN     = newTemp(Ity_I32);
11577         IRTemp irt_regM     = newTemp(Ity_I32);
11578         IRTemp irt_sum      = newTemp(Ity_I32);
11579         IRTemp irt_diff     = newTemp(Ity_I32);
11580         IRTemp irt_res_sum  = newTemp(Ity_I32);
11581         IRTemp irt_res_diff = newTemp(Ity_I32);
11582
11583         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11584         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11585
11586         assign( irt_diff,
11587                 binop( Iop_Sub32,
11588                        binop( Iop_Shr32,
11589                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11590                               mkU8(16) ),
11591                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11592         armUnsignedSatQ( &irt_res_diff, NULL, irt_diff, 0x10 );
11593
11594         assign( irt_sum,
11595                 binop( Iop_Add32,
11596                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11597                        binop( Iop_Shr32,
11598                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11599                               mkU8(16) ) ) );
11600         armUnsignedSatQ( &irt_res_sum, NULL, irt_sum, 0x10 );
11601
11602         IRExpr* ire_result
11603           = binop( Iop_Or32,
11604                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
11605                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
11606
11607         if (isT)
11608            putIRegT( regD, ire_result, condT );
11609         else
11610            putIRegA( regD, ire_result, condT, Ijk_Boring );
11611
11612         DIP( "uqasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11613         return True;
11614      }
11615      /* fall through */
11616    }
11617
11618    /* ------------------- usax<c> <Rd>,<Rn>,<Rm> ------------------- */
11619    {
11620      UInt regD = 99, regN = 99, regM = 99;
11621      Bool gate = False;
11622
11623      if (isT) {
11624         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11625            regN = INSNT0(3,0);
11626            regD = INSNT1(11,8);
11627            regM = INSNT1(3,0);
11628            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11629               gate = True;
11630         }
11631      } else {
11632         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11633             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11634             INSNA(7,4)   == BITS4(0,1,0,1)) {
11635            regD = INSNA(15,12);
11636            regN = INSNA(19,16);
11637            regM = INSNA(3,0);
11638            if (regD != 15 && regN != 15 && regM != 15)
11639               gate = True;
11640         }
11641      }
11642
11643      if (gate) {
11644         IRTemp irt_regN = newTemp(Ity_I32);
11645         IRTemp irt_regM = newTemp(Ity_I32);
11646         IRTemp irt_sum  = newTemp(Ity_I32);
11647         IRTemp irt_diff = newTemp(Ity_I32);
11648
11649         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11650         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11651
11652         assign( irt_sum,
11653                 binop( Iop_Add32,
11654                        unop( Iop_16Uto32,
11655                              unop( Iop_32to16, mkexpr(irt_regN) )
11656                        ),
11657                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11658
11659         assign( irt_diff,
11660                 binop( Iop_Sub32,
11661                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11662                        unop( Iop_16Uto32,
11663                              unop( Iop_32to16, mkexpr(irt_regM) )
11664                        )
11665                 )
11666         );
11667
11668         IRExpr* ire_result
11669           = binop( Iop_Or32,
11670                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11671                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11672
11673         IRTemp ge10 = newTemp(Ity_I32);
11674         assign( ge10, IRExpr_ITE( binop( Iop_CmpLE32U,
11675                                          mkU32(0x10000), mkexpr(irt_sum) ),
11676                                   mkU32(1), mkU32(0) ) );
11677         put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
11678         put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
11679
11680         IRTemp ge32 = newTemp(Ity_I32);
11681         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11682         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11683         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11684
11685         if (isT)
11686            putIRegT( regD, ire_result, condT );
11687         else
11688            putIRegA( regD, ire_result, condT, Ijk_Boring );
11689
11690         DIP( "usax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11691         return True;
11692      }
11693      /* fall through */
11694    }
11695
11696    /* ------------------- uasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11697    {
11698      UInt regD = 99, regN = 99, regM = 99;
11699      Bool gate = False;
11700
11701      if (isT) {
11702         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11703            regN = INSNT0(3,0);
11704            regD = INSNT1(11,8);
11705            regM = INSNT1(3,0);
11706            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11707               gate = True;
11708         }
11709      } else {
11710         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11711             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11712             INSNA(7,4)   == BITS4(0,0,1,1)) {
11713            regD = INSNA(15,12);
11714            regN = INSNA(19,16);
11715            regM = INSNA(3,0);
11716            if (regD != 15 && regN != 15 && regM != 15)
11717               gate = True;
11718         }
11719      }
11720
11721      if (gate) {
11722         IRTemp irt_regN = newTemp(Ity_I32);
11723         IRTemp irt_regM = newTemp(Ity_I32);
11724         IRTemp irt_sum  = newTemp(Ity_I32);
11725         IRTemp irt_diff = newTemp(Ity_I32);
11726
11727         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11728         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11729
11730         assign( irt_diff,
11731                 binop( Iop_Sub32,
11732                        unop( Iop_16Uto32,
11733                              unop( Iop_32to16, mkexpr(irt_regN) )
11734                        ),
11735                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11736
11737         assign( irt_sum,
11738                 binop( Iop_Add32,
11739                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11740                        unop( Iop_16Uto32,
11741                              unop( Iop_32to16, mkexpr(irt_regM) )
11742                        ) ) );
11743
11744         IRExpr* ire_result
11745           = binop( Iop_Or32,
11746                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
11747                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
11748
11749         IRTemp ge10 = newTemp(Ity_I32);
11750         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
11751         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11752         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11753
11754         IRTemp ge32 = newTemp(Ity_I32);
11755         assign( ge32, IRExpr_ITE( binop( Iop_CmpLE32U,
11756                                          mkU32(0x10000), mkexpr(irt_sum) ),
11757                                   mkU32(1), mkU32(0) ) );
11758         put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
11759         put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
11760
11761         if (isT)
11762            putIRegT( regD, ire_result, condT );
11763         else
11764            putIRegA( regD, ire_result, condT, Ijk_Boring );
11765
11766         DIP( "uasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11767         return True;
11768      }
11769      /* fall through */
11770    }
11771
11772    /* ------------------- ssax<c> <Rd>,<Rn>,<Rm> ------------------- */
11773    {
11774      UInt regD = 99, regN = 99, regM = 99;
11775      Bool gate = False;
11776
11777      if (isT) {
11778         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
11779            regN = INSNT0(3,0);
11780            regD = INSNT1(11,8);
11781            regM = INSNT1(3,0);
11782            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11783               gate = True;
11784         }
11785      } else {
11786         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
11787             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11788             INSNA(7,4)   == BITS4(0,1,0,1)) {
11789            regD = INSNA(15,12);
11790            regN = INSNA(19,16);
11791            regM = INSNA(3,0);
11792            if (regD != 15 && regN != 15 && regM != 15)
11793               gate = True;
11794         }
11795      }
11796
11797      if (gate) {
11798         IRTemp irt_regN = newTemp(Ity_I32);
11799         IRTemp irt_regM = newTemp(Ity_I32);
11800         IRTemp irt_sum  = newTemp(Ity_I32);
11801         IRTemp irt_diff = newTemp(Ity_I32);
11802
11803         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11804         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11805
11806         assign( irt_sum,
11807                 binop( Iop_Add32,
11808                        binop( Iop_Sar32,
11809                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11810                               mkU8(16) ),
11811                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
11812
11813         assign( irt_diff,
11814                 binop( Iop_Sub32,
11815                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
11816                        binop( Iop_Sar32,
11817                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11818                               mkU8(16) ) ) );
11819
11820         IRExpr* ire_result
11821           = binop( Iop_Or32,
11822                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11823                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11824
11825         IRTemp ge10 = newTemp(Ity_I32);
11826         assign(ge10, unop(Iop_Not32, mkexpr(irt_sum)));
11827         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11828         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11829
11830         IRTemp ge32 = newTemp(Ity_I32);
11831         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11832         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11833         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11834
11835         if (isT)
11836            putIRegT( regD, ire_result, condT );
11837         else
11838            putIRegA( regD, ire_result, condT, Ijk_Boring );
11839
11840         DIP( "ssax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11841         return True;
11842      }
11843      /* fall through */
11844    }
11845
11846    /* ----------------- shsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11847    {
11848      UInt regD = 99, regN = 99, regM = 99;
11849      Bool gate = False;
11850
11851      if (isT) {
11852         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11853            regN = INSNT0(3,0);
11854            regD = INSNT1(11,8);
11855            regM = INSNT1(3,0);
11856            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11857               gate = True;
11858         }
11859      } else {
11860         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11861             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11862             INSNA(7,4)   == BITS4(1,1,1,1)) {
11863            regD = INSNA(15,12);
11864            regN = INSNA(19,16);
11865            regM = INSNA(3,0);
11866            if (regD != 15 && regN != 15 && regM != 15)
11867               gate = True;
11868         }
11869      }
11870
11871      if (gate) {
11872         IRTemp rNt   = newTemp(Ity_I32);
11873         IRTemp rMt   = newTemp(Ity_I32);
11874         IRTemp res_q = newTemp(Ity_I32);
11875
11876         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11877         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11878
11879         assign(res_q, binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
11880         if (isT)
11881            putIRegT( regD, mkexpr(res_q), condT );
11882         else
11883            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11884
11885         DIP("shsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11886         return True;
11887      }
11888      /* fall through */
11889    }
11890
11891    /* ----------------- sxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
11892    {
11893      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
11894      Bool gate = False;
11895
11896      if (isT) {
11897         if (INSNT0(15,4) == 0xFA2 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
11898            regN   = INSNT0(3,0);
11899            regD   = INSNT1(11,8);
11900            regM   = INSNT1(3,0);
11901            rotate = INSNT1(5,4);
11902            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11903               gate = True;
11904         }
11905      } else {
11906         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
11907             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
11908            regD   = INSNA(15,12);
11909            regN   = INSNA(19,16);
11910            regM   = INSNA(3,0);
11911            rotate = INSNA(11,10);
11912            if (regD != 15 && regN != 15 && regM != 15)
11913              gate = True;
11914         }
11915      }
11916
11917      if (gate) {
11918         IRTemp irt_regN = newTemp(Ity_I32);
11919         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11920
11921         IRTemp irt_regM = newTemp(Ity_I32);
11922         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11923
11924         IRTemp irt_rot = newTemp(Ity_I32);
11925         assign( irt_rot, genROR32(irt_regM, 8 * rotate) );
11926
11927         /* FIXME Maybe we can write this arithmetic in shorter form. */
11928         IRExpr* resLo
11929            = binop(Iop_And32,
11930                    binop(Iop_Add32,
11931                          mkexpr(irt_regN),
11932                          unop(Iop_16Uto32,
11933                               unop(Iop_8Sto16,
11934                                    unop(Iop_32to8, mkexpr(irt_rot))))),
11935                    mkU32(0x0000FFFF));
11936
11937         IRExpr* resHi
11938            = binop(Iop_And32,
11939                    binop(Iop_Add32,
11940                          mkexpr(irt_regN),
11941                          binop(Iop_Shl32,
11942                                unop(Iop_16Uto32,
11943                                     unop(Iop_8Sto16,
11944                                          unop(Iop_32to8,
11945                                               binop(Iop_Shr32,
11946                                                     mkexpr(irt_rot),
11947                                                     mkU8(16))))),
11948                                mkU8(16))),
11949                    mkU32(0xFFFF0000));
11950
11951         IRExpr* ire_result
11952            = binop( Iop_Or32, resHi, resLo );
11953
11954         if (isT)
11955            putIRegT( regD, ire_result, condT );
11956         else
11957            putIRegA( regD, ire_result, condT, Ijk_Boring );
11958
11959         DIP( "sxtab16%s r%u, r%u, r%u, ROR #%u\n",
11960              nCC(conq), regD, regN, regM, 8 * rotate );
11961         return True;
11962      }
11963      /* fall through */
11964    }
11965
11966    /* ----------------- shasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11967    {
11968      UInt regD = 99, regN = 99, regM = 99;
11969      Bool gate = False;
11970
11971      if (isT) {
11972         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11973            regN = INSNT0(3,0);
11974            regD = INSNT1(11,8);
11975            regM = INSNT1(3,0);
11976            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11977               gate = True;
11978         }
11979      } else {
11980         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11981             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11982             INSNA(7,4)   == BITS4(0,0,1,1)) {
11983            regD = INSNA(15,12);
11984            regN = INSNA(19,16);
11985            regM = INSNA(3,0);
11986            if (regD != 15 && regN != 15 && regM != 15)
11987               gate = True;
11988         }
11989      }
11990
11991      if (gate) {
11992         IRTemp rNt   = newTemp(Ity_I32);
11993         IRTemp rMt   = newTemp(Ity_I32);
11994         IRTemp irt_diff  = newTemp(Ity_I32);
11995         IRTemp irt_sum   = newTemp(Ity_I32);
11996         IRTemp res_q = newTemp(Ity_I32);
11997
11998         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11999         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12000
12001         assign( irt_diff,
12002                 binop(Iop_Sub32,
12003                       unop(Iop_16Sto32,
12004                            unop(Iop_32to16,
12005                                 mkexpr(rNt)
12006                            )
12007                       ),
12008                       unop(Iop_16Sto32,
12009                            unop(Iop_32to16,
12010                                 binop(Iop_Shr32,
12011                                       mkexpr(rMt), mkU8(16)
12012                                 )
12013                            )
12014                       )
12015                 )
12016         );
12017
12018         assign( irt_sum,
12019                 binop(Iop_Add32,
12020                       unop(Iop_16Sto32,
12021                            unop(Iop_32to16,
12022                                 binop(Iop_Shr32,
12023                                       mkexpr(rNt), mkU8(16)
12024                                 )
12025                            )
12026                       ),
12027                       unop(Iop_16Sto32,
12028                            unop(Iop_32to16, mkexpr(rMt)
12029                            )
12030                       )
12031                 )
12032         );
12033
12034         assign( res_q,
12035                 binop(Iop_Or32,
12036                       unop(Iop_16Uto32,
12037                            unop(Iop_32to16,
12038                                 binop(Iop_Shr32,
12039                                       mkexpr(irt_diff), mkU8(1)
12040                                 )
12041                            )
12042                       ),
12043                       binop(Iop_Shl32,
12044                             binop(Iop_Shr32,
12045                                   mkexpr(irt_sum), mkU8(1)
12046                             ),
12047                             mkU8(16)
12048                      )
12049                 )
12050         );
12051
12052         if (isT)
12053            putIRegT( regD, mkexpr(res_q), condT );
12054         else
12055            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12056
12057         DIP("shasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12058         return True;
12059      }
12060      /* fall through */
12061    }
12062
12063    /* ----------------- uhasx<c> <Rd>,<Rn>,<Rm> ------------------- */
12064    {
12065      UInt regD = 99, regN = 99, regM = 99;
12066      Bool gate = False;
12067
12068      if (isT) {
12069         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12070            regN = INSNT0(3,0);
12071            regD = INSNT1(11,8);
12072            regM = INSNT1(3,0);
12073            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12074               gate = True;
12075         }
12076      } else {
12077         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12078             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12079             INSNA(7,4)   == BITS4(0,0,1,1)) {
12080            regD = INSNA(15,12);
12081            regN = INSNA(19,16);
12082            regM = INSNA(3,0);
12083            if (regD != 15 && regN != 15 && regM != 15)
12084               gate = True;
12085         }
12086      }
12087
12088      if (gate) {
12089         IRTemp rNt   = newTemp(Ity_I32);
12090         IRTemp rMt   = newTemp(Ity_I32);
12091         IRTemp irt_diff  = newTemp(Ity_I32);
12092         IRTemp irt_sum   = newTemp(Ity_I32);
12093         IRTemp res_q = newTemp(Ity_I32);
12094
12095         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12096         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12097
12098         assign( irt_diff,
12099                 binop(Iop_Sub32,
12100                       unop(Iop_16Uto32,
12101                            unop(Iop_32to16,
12102                                 mkexpr(rNt)
12103                            )
12104                       ),
12105                       unop(Iop_16Uto32,
12106                            unop(Iop_32to16,
12107                                 binop(Iop_Shr32,
12108                                       mkexpr(rMt), mkU8(16)
12109                                 )
12110                            )
12111                       )
12112                 )
12113         );
12114
12115         assign( irt_sum,
12116                 binop(Iop_Add32,
12117                       unop(Iop_16Uto32,
12118                            unop(Iop_32to16,
12119                                 binop(Iop_Shr32,
12120                                       mkexpr(rNt), mkU8(16)
12121                                 )
12122                            )
12123                       ),
12124                       unop(Iop_16Uto32,
12125                            unop(Iop_32to16, mkexpr(rMt)
12126                            )
12127                       )
12128                 )
12129         );
12130
12131         assign( res_q,
12132                 binop(Iop_Or32,
12133                       unop(Iop_16Uto32,
12134                            unop(Iop_32to16,
12135                                 binop(Iop_Shr32,
12136                                       mkexpr(irt_diff), mkU8(1)
12137                                 )
12138                            )
12139                       ),
12140                       binop(Iop_Shl32,
12141                             binop(Iop_Shr32,
12142                                   mkexpr(irt_sum), mkU8(1)
12143                             ),
12144                             mkU8(16)
12145                      )
12146                 )
12147         );
12148
12149         if (isT)
12150            putIRegT( regD, mkexpr(res_q), condT );
12151         else
12152            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12153
12154         DIP("uhasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12155         return True;
12156      }
12157      /* fall through */
12158    }
12159
12160    /* ----------------- shsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12161    {
12162      UInt regD = 99, regN = 99, regM = 99;
12163      Bool gate = False;
12164
12165      if (isT) {
12166         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12167            regN = INSNT0(3,0);
12168            regD = INSNT1(11,8);
12169            regM = INSNT1(3,0);
12170            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12171               gate = True;
12172         }
12173      } else {
12174         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12175             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12176             INSNA(7,4)   == BITS4(0,1,0,1)) {
12177            regD = INSNA(15,12);
12178            regN = INSNA(19,16);
12179            regM = INSNA(3,0);
12180            if (regD != 15 && regN != 15 && regM != 15)
12181               gate = True;
12182         }
12183      }
12184
12185      if (gate) {
12186         IRTemp rNt   = newTemp(Ity_I32);
12187         IRTemp rMt   = newTemp(Ity_I32);
12188         IRTemp irt_diff  = newTemp(Ity_I32);
12189         IRTemp irt_sum   = newTemp(Ity_I32);
12190         IRTemp res_q = newTemp(Ity_I32);
12191
12192         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12193         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12194
12195         assign( irt_sum,
12196                 binop(Iop_Add32,
12197                       unop(Iop_16Sto32,
12198                            unop(Iop_32to16,
12199                                 mkexpr(rNt)
12200                            )
12201                       ),
12202                       unop(Iop_16Sto32,
12203                            unop(Iop_32to16,
12204                                 binop(Iop_Shr32,
12205                                       mkexpr(rMt), mkU8(16)
12206                                 )
12207                            )
12208                       )
12209                 )
12210         );
12211
12212         assign( irt_diff,
12213                 binop(Iop_Sub32,
12214                       unop(Iop_16Sto32,
12215                            unop(Iop_32to16,
12216                                 binop(Iop_Shr32,
12217                                       mkexpr(rNt), mkU8(16)
12218                                 )
12219                            )
12220                       ),
12221                       unop(Iop_16Sto32,
12222                            unop(Iop_32to16, mkexpr(rMt)
12223                            )
12224                       )
12225                 )
12226         );
12227
12228         assign( res_q,
12229                 binop(Iop_Or32,
12230                       unop(Iop_16Uto32,
12231                            unop(Iop_32to16,
12232                                 binop(Iop_Shr32,
12233                                       mkexpr(irt_sum), mkU8(1)
12234                                 )
12235                            )
12236                       ),
12237                       binop(Iop_Shl32,
12238                             binop(Iop_Shr32,
12239                                   mkexpr(irt_diff), mkU8(1)
12240                             ),
12241                             mkU8(16)
12242                      )
12243                 )
12244         );
12245
12246         if (isT)
12247            putIRegT( regD, mkexpr(res_q), condT );
12248         else
12249            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12250
12251         DIP("shsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12252         return True;
12253      }
12254      /* fall through */
12255    }
12256
12257    /* ----------------- uhsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12258    {
12259      UInt regD = 99, regN = 99, regM = 99;
12260      Bool gate = False;
12261
12262      if (isT) {
12263         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12264            regN = INSNT0(3,0);
12265            regD = INSNT1(11,8);
12266            regM = INSNT1(3,0);
12267            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12268               gate = True;
12269         }
12270      } else {
12271         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12272             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12273             INSNA(7,4)   == BITS4(0,1,0,1)) {
12274            regD = INSNA(15,12);
12275            regN = INSNA(19,16);
12276            regM = INSNA(3,0);
12277            if (regD != 15 && regN != 15 && regM != 15)
12278               gate = True;
12279         }
12280      }
12281
12282      if (gate) {
12283         IRTemp rNt   = newTemp(Ity_I32);
12284         IRTemp rMt   = newTemp(Ity_I32);
12285         IRTemp irt_diff  = newTemp(Ity_I32);
12286         IRTemp irt_sum   = newTemp(Ity_I32);
12287         IRTemp res_q = newTemp(Ity_I32);
12288
12289         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12290         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12291
12292         assign( irt_sum,
12293                 binop(Iop_Add32,
12294                       unop(Iop_16Uto32,
12295                            unop(Iop_32to16,
12296                                 mkexpr(rNt)
12297                            )
12298                       ),
12299                       unop(Iop_16Uto32,
12300                            unop(Iop_32to16,
12301                                 binop(Iop_Shr32,
12302                                       mkexpr(rMt), mkU8(16)
12303                                 )
12304                            )
12305                       )
12306                 )
12307         );
12308
12309         assign( irt_diff,
12310                 binop(Iop_Sub32,
12311                       unop(Iop_16Uto32,
12312                            unop(Iop_32to16,
12313                                 binop(Iop_Shr32,
12314                                       mkexpr(rNt), mkU8(16)
12315                                 )
12316                            )
12317                       ),
12318                       unop(Iop_16Uto32,
12319                            unop(Iop_32to16, mkexpr(rMt)
12320                            )
12321                       )
12322                 )
12323         );
12324
12325         assign( res_q,
12326                 binop(Iop_Or32,
12327                       unop(Iop_16Uto32,
12328                            unop(Iop_32to16,
12329                                 binop(Iop_Shr32,
12330                                       mkexpr(irt_sum), mkU8(1)
12331                                 )
12332                            )
12333                       ),
12334                       binop(Iop_Shl32,
12335                             binop(Iop_Shr32,
12336                                   mkexpr(irt_diff), mkU8(1)
12337                             ),
12338                             mkU8(16)
12339                      )
12340                 )
12341         );
12342
12343         if (isT)
12344            putIRegT( regD, mkexpr(res_q), condT );
12345         else
12346            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12347
12348         DIP("uhsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12349         return True;
12350      }
12351      /* fall through */
12352    }
12353
12354    /* ----------------- shsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
12355    {
12356      UInt regD = 99, regN = 99, regM = 99;
12357      Bool gate = False;
12358
12359      if (isT) {
12360         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12361            regN = INSNT0(3,0);
12362            regD = INSNT1(11,8);
12363            regM = INSNT1(3,0);
12364            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12365               gate = True;
12366         }
12367      } else {
12368         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12369             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12370             INSNA(7,4)   == BITS4(0,1,1,1)) {
12371            regD = INSNA(15,12);
12372            regN = INSNA(19,16);
12373            regM = INSNA(3,0);
12374            if (regD != 15 && regN != 15 && regM != 15)
12375               gate = True;
12376         }
12377      }
12378
12379      if (gate) {
12380         IRTemp rNt   = newTemp(Ity_I32);
12381         IRTemp rMt   = newTemp(Ity_I32);
12382         IRTemp res_q = newTemp(Ity_I32);
12383
12384         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12385         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12386
12387         assign(res_q, binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
12388         if (isT)
12389            putIRegT( regD, mkexpr(res_q), condT );
12390         else
12391            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12392
12393         DIP("shsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12394         return True;
12395      }
12396      /* fall through */
12397    }
12398
12399    /* ----------------- smmls{r}<c> <Rd>,<Rn>,<Rm>,<Ra> ------------------- */
12400    {
12401      UInt rD = 99, rN = 99, rM = 99, rA = 99;
12402      Bool round  = False;
12403      Bool gate   = False;
12404
12405      if (isT) {
12406         if (INSNT0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
12407             && INSNT0(6,4) == BITS3(1,1,0)
12408             && INSNT1(7,5) == BITS3(0,0,0)) {
12409            round = INSNT1(4,4);
12410            rA    = INSNT1(15,12);
12411            rD    = INSNT1(11,8);
12412            rM    = INSNT1(3,0);
12413            rN    = INSNT0(3,0);
12414            if (!isBadRegT(rD)
12415                && !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rA))
12416               gate = True;
12417         }
12418      } else {
12419         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,1)
12420             && INSNA(15,12) != BITS4(1,1,1,1)
12421             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(1,1,0,1)) {
12422            round = INSNA(5,5);
12423            rD    = INSNA(19,16);
12424            rA    = INSNA(15,12);
12425            rM    = INSNA(11,8);
12426            rN    = INSNA(3,0);
12427            if (rD != 15 && rM != 15 && rN != 15)
12428               gate = True;
12429         }
12430      }
12431      if (gate) {
12432         IRTemp irt_rA   = newTemp(Ity_I32);
12433         IRTemp irt_rN   = newTemp(Ity_I32);
12434         IRTemp irt_rM   = newTemp(Ity_I32);
12435         assign( irt_rA, isT ? getIRegT(rA) : getIRegA(rA) );
12436         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12437         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12438         IRExpr* res
12439         = unop(Iop_64HIto32,
12440                binop(Iop_Add64,
12441                      binop(Iop_Sub64,
12442                            binop(Iop_32HLto64, mkexpr(irt_rA), mkU32(0)),
12443                            binop(Iop_MullS32, mkexpr(irt_rN), mkexpr(irt_rM))),
12444                      mkU64(round ? 0x80000000ULL : 0ULL)));
12445         if (isT)
12446            putIRegT( rD, res, condT );
12447         else
12448            putIRegA(rD, res, condT, Ijk_Boring);
12449         DIP("smmls%s%s r%u, r%u, r%u, r%u\n",
12450             round ? "r" : "", nCC(conq), rD, rN, rM, rA);
12451         return True;
12452      }
12453      /* fall through */
12454    }
12455
12456    /* -------------- smlald{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12457    {
12458      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12459      Bool m_swap = False;
12460      Bool gate   = False;
12461
12462      if (isT) {
12463         if (INSNT0(15,4) == 0xFBC &&
12464             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0)) {
12465            rN     = INSNT0(3,0);
12466            rDlo   = INSNT1(15,12);
12467            rDhi   = INSNT1(11,8);
12468            rM     = INSNT1(3,0);
12469            m_swap = (INSNT1(4,4) & 1) == 1;
12470            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
12471                && !isBadRegT(rM) && rDhi != rDlo)
12472               gate = True;
12473         }
12474      } else {
12475         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0)
12476             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
12477            rN     = INSNA(3,0);
12478            rDlo   = INSNA(15,12);
12479            rDhi   = INSNA(19,16);
12480            rM     = INSNA(11,8);
12481            m_swap = ( INSNA(5,5) & 1 ) == 1;
12482            if (rDlo != 15 && rDhi != 15
12483                && rN != 15 && rM != 15 && rDlo != rDhi)
12484               gate = True;
12485         }
12486      }
12487
12488      if (gate) {
12489         IRTemp irt_rM   = newTemp(Ity_I32);
12490         IRTemp irt_rN   = newTemp(Ity_I32);
12491         IRTemp irt_rDhi = newTemp(Ity_I32);
12492         IRTemp irt_rDlo = newTemp(Ity_I32);
12493         IRTemp op_2     = newTemp(Ity_I32);
12494         IRTemp pr_1     = newTemp(Ity_I64);
12495         IRTemp pr_2     = newTemp(Ity_I64);
12496         IRTemp result   = newTemp(Ity_I64);
12497         IRTemp resHi    = newTemp(Ity_I32);
12498         IRTemp resLo    = newTemp(Ity_I32);
12499         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM));
12500         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN));
12501         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi));
12502         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo));
12503         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12504         assign( pr_1, binop(Iop_MullS32,
12505                             unop(Iop_16Sto32,
12506                                  unop(Iop_32to16, mkexpr(irt_rN))
12507                             ),
12508                             unop(Iop_16Sto32,
12509                                  unop(Iop_32to16, mkexpr(op_2))
12510                             )
12511                       )
12512         );
12513         assign( pr_2, binop(Iop_MullS32,
12514                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12515                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12516                       )
12517         );
12518         assign( result, binop(Iop_Add64,
12519                               binop(Iop_Add64,
12520                                     mkexpr(pr_1),
12521                                     mkexpr(pr_2)
12522                               ),
12523                               binop(Iop_32HLto64,
12524                                     mkexpr(irt_rDhi),
12525                                     mkexpr(irt_rDlo)
12526                               )
12527                         )
12528         );
12529         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12530         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12531         if (isT) {
12532            putIRegT( rDhi, mkexpr(resHi), condT );
12533            putIRegT( rDlo, mkexpr(resLo), condT );
12534         } else {
12535            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12536            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12537         }
12538         DIP("smlald%c%s r%u, r%u, r%u, r%u\n",
12539             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12540         return True;
12541      }
12542      /* fall through */
12543    }
12544
12545    /* -------------- smlsld{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12546    {
12547      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12548      Bool m_swap = False;
12549      Bool gate   = False;
12550
12551      if (isT) {
12552         if ((INSNT0(15,4) == 0xFBD &&
12553             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0))) {
12554            rN     = INSNT0(3,0);
12555            rDlo   = INSNT1(15,12);
12556            rDhi   = INSNT1(11,8);
12557            rM     = INSNT1(3,0);
12558            m_swap = (INSNT1(4,4) & 1) == 1;
12559            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN) &&
12560                !isBadRegT(rM) && rDhi != rDlo)
12561               gate = True;
12562         }
12563      } else {
12564         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0) &&
12565             (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,1,0,1)) {
12566            rN     = INSNA(3,0);
12567            rDlo   = INSNA(15,12);
12568            rDhi   = INSNA(19,16);
12569            rM     = INSNA(11,8);
12570            m_swap = (INSNA(5,5) & 1) == 1;
12571            if (rDlo != 15 && rDhi != 15 &&
12572                rN != 15 && rM != 15 && rDlo != rDhi)
12573               gate = True;
12574         }
12575      }
12576      if (gate) {
12577         IRTemp irt_rM   = newTemp(Ity_I32);
12578         IRTemp irt_rN   = newTemp(Ity_I32);
12579         IRTemp irt_rDhi = newTemp(Ity_I32);
12580         IRTemp irt_rDlo = newTemp(Ity_I32);
12581         IRTemp op_2     = newTemp(Ity_I32);
12582         IRTemp pr_1     = newTemp(Ity_I64);
12583         IRTemp pr_2     = newTemp(Ity_I64);
12584         IRTemp result   = newTemp(Ity_I64);
12585         IRTemp resHi    = newTemp(Ity_I32);
12586         IRTemp resLo    = newTemp(Ity_I32);
12587         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12588         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12589         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi) );
12590         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo) );
12591         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12592         assign( pr_1, binop(Iop_MullS32,
12593                             unop(Iop_16Sto32,
12594                                  unop(Iop_32to16, mkexpr(irt_rN))
12595                             ),
12596                             unop(Iop_16Sto32,
12597                                  unop(Iop_32to16, mkexpr(op_2))
12598                             )
12599                       )
12600         );
12601         assign( pr_2, binop(Iop_MullS32,
12602                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12603                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12604                       )
12605         );
12606         assign( result, binop(Iop_Add64,
12607                               binop(Iop_Sub64,
12608                                     mkexpr(pr_1),
12609                                     mkexpr(pr_2)
12610                               ),
12611                               binop(Iop_32HLto64,
12612                                     mkexpr(irt_rDhi),
12613                                     mkexpr(irt_rDlo)
12614                               )
12615                         )
12616         );
12617         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12618         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12619         if (isT) {
12620            putIRegT( rDhi, mkexpr(resHi), condT );
12621            putIRegT( rDlo, mkexpr(resLo), condT );
12622         } else {
12623            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12624            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12625         }
12626         DIP("smlsld%c%s r%u, r%u, r%u, r%u\n",
12627             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12628         return True;
12629      }
12630      /* fall through */
12631    }
12632
12633    /* ---------- Doesn't match anything. ---------- */
12634    return False;
12635
12636 #  undef INSNA
12637 #  undef INSNT0
12638 #  undef INSNT1
12639 }
12640
12641
12642 /*------------------------------------------------------------*/
12643 /*--- V8 instructions                                      ---*/
12644 /*------------------------------------------------------------*/
12645
12646 /* Break a V128-bit value up into four 32-bit ints. */
12647
12648 static void breakupV128to32s ( IRTemp t128,
12649                                /*OUTs*/
12650                                IRTemp* t3, IRTemp* t2,
12651                                IRTemp* t1, IRTemp* t0 )
12652 {
12653    IRTemp hi64 = newTemp(Ity_I64);
12654    IRTemp lo64 = newTemp(Ity_I64);
12655    assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
12656    assign( lo64, unop(Iop_V128to64,   mkexpr(t128)) );
12657
12658    vassert(t0 && *t0 == IRTemp_INVALID);
12659    vassert(t1 && *t1 == IRTemp_INVALID);
12660    vassert(t2 && *t2 == IRTemp_INVALID);
12661    vassert(t3 && *t3 == IRTemp_INVALID);
12662
12663    *t0 = newTemp(Ity_I32);
12664    *t1 = newTemp(Ity_I32);
12665    *t2 = newTemp(Ity_I32);
12666    *t3 = newTemp(Ity_I32);
12667    assign( *t0, unop(Iop_64to32,   mkexpr(lo64)) );
12668    assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
12669    assign( *t2, unop(Iop_64to32,   mkexpr(hi64)) );
12670    assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
12671 }
12672
12673
12674 /* Both ARM and Thumb */
12675
12676 /* Translate a V8 instruction.  If successful, returns True and *dres
12677    may or may not be updated.  If unsuccessful, returns False and
12678    doesn't change *dres nor create any IR.
12679
12680    The Thumb and ARM encodings are potentially different.  In both
12681    ARM and Thumb mode, the caller must pass the entire 32 bits of
12682    the instruction.  Callers may pass any instruction; this function
12683    ignores anything it doesn't recognise.
12684
12685    Caller must supply an IRTemp 'condT' holding the gating condition,
12686    or IRTemp_INVALID indicating the insn is always executed.
12687
12688    If we are decoding an ARM instruction which is in the NV space
12689    then it is expected that condT will be IRTemp_INVALID, and that is
12690    asserted for.  That condition is ensured by the logic near the top
12691    of disInstr_ARM_WRK, that sets up condT.
12692
12693    When decoding for Thumb, the caller must pass the ITState pre/post
12694    this instruction, so that we can generate a SIGILL in the cases where
12695    the instruction may not be in an IT block.  When decoding for ARM,
12696    both of these must be IRTemp_INVALID.
12697
12698    Finally, the caller must indicate whether this occurs in ARM or in
12699    Thumb code.
12700 */
12701 static Bool decode_V8_instruction (
12702                /*MOD*/DisResult* dres,
12703                UInt              insnv8,
12704                IRTemp            condT,
12705                Bool              isT,
12706                IRTemp            old_itstate,
12707                IRTemp            new_itstate
12708             )
12709 {
12710 #  define INSN(_bMax,_bMin)   SLICE_UInt(insnv8, (_bMax), (_bMin))
12711
12712    if (isT) {
12713       vassert(old_itstate != IRTemp_INVALID);
12714       vassert(new_itstate != IRTemp_INVALID);
12715    } else {
12716       vassert(old_itstate == IRTemp_INVALID);
12717       vassert(new_itstate == IRTemp_INVALID);
12718    }
12719
12720    /* ARMCondcode 'conq' is only used for debug printing and for no other
12721       purpose.  For ARM, this is simply the top 4 bits of the instruction.
12722       For Thumb, the condition is not (really) known until run time, and so
12723       we set it to ARMCondAL in order that printing of these instructions
12724       does not show any condition. */
12725    ARMCondcode conq;
12726    if (isT) {
12727       conq = ARMCondAL;
12728    } else {
12729       conq = (ARMCondcode)INSN(31,28);
12730       if (conq == ARMCondNV || conq == ARMCondAL) {
12731          vassert(condT == IRTemp_INVALID);
12732       } else {
12733          vassert(condT != IRTemp_INVALID);
12734       }
12735       vassert(conq >= ARMCondEQ && conq <= ARMCondNV);
12736    }
12737
12738    /* ----------- {AESD, AESE, AESMC, AESIMC}.8 q_q ----------- */
12739    /*     31   27   23  21 19 17 15 11   7      3
12740       T1: 1111 1111 1 D 11 sz 00 d  0011 00 M 0 m  AESE Qd, Qm
12741       A1: 1111 0011 1 D 11 sz 00 d  0011 00 M 0 m  AESE Qd, Qm
12742
12743       T1: 1111 1111 1 D 11 sz 00 d  0011 01 M 0 m  AESD Qd, Qm
12744       A1: 1111 0011 1 D 11 sz 00 d  0011 01 M 0 m  AESD Qd, Qm
12745
12746       T1: 1111 1111 1 D 11 sz 00 d  0011 10 M 0 m  AESMC Qd, Qm
12747       A1: 1111 0011 1 D 11 sz 00 d  0011 10 M 0 m  AESMC Qd, Qm
12748
12749       T1: 1111 1111 1 D 11 sz 00 d  0011 11 M 0 m  AESIMC Qd, Qm
12750       A1: 1111 0011 1 D 11 sz 00 d  0011 11 M 0 m  AESIMC Qd, Qm
12751
12752       sz must be 00
12753       ARM encoding is in NV space.
12754       In Thumb mode, we must not be in an IT block.
12755    */
12756    {
12757      UInt regD = 99, regM = 99, opc = 4/*invalid*/;
12758      Bool gate = True;
12759
12760      UInt high9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
12761      if (INSN(31,23) == high9 && INSN(21,16) == BITS6(1,1,0,0,0,0)
12762          && INSN(11,8) == BITS4(0,0,1,1) && INSN(4,4) == 0) {
12763         UInt bitD = INSN(22,22);
12764         UInt fldD = INSN(15,12);
12765         UInt bitM = INSN(5,5);
12766         UInt fldM = INSN(3,0);
12767         opc  = INSN(7,6);
12768         regD = (bitD << 4) | fldD;
12769         regM = (bitM << 4) | fldM;
12770      }
12771      if ((regD & 1) == 1 || (regM & 1) == 1)
12772         gate = False;
12773
12774      if (gate) {
12775         if (isT) {
12776            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
12777         }
12778         /* In ARM mode, this is statically unconditional.  In Thumb mode,
12779            this must be dynamically unconditional, and we've SIGILLd if not.
12780            In either case we can create unconditional IR. */
12781         IRTemp op1 = newTemp(Ity_V128);
12782         IRTemp op2 = newTemp(Ity_V128);
12783         IRTemp src = newTemp(Ity_V128);
12784         IRTemp res = newTemp(Ity_V128);
12785         assign(op1,  getQReg(regD >> 1));
12786         assign(op2,  getQReg(regM >> 1));
12787         assign(src,  opc == BITS2(0,0) || opc == BITS2(0,1)
12788                         ? binop(Iop_XorV128, mkexpr(op1), mkexpr(op2))
12789                         : mkexpr(op2));
12790
12791         void* helpers[4]
12792            = { &armg_dirtyhelper_AESE,  &armg_dirtyhelper_AESD,
12793                &armg_dirtyhelper_AESMC, &armg_dirtyhelper_AESIMC };
12794         const HChar* hNames[4]
12795            = { "armg_dirtyhelper_AESE",  "armg_dirtyhelper_AESD",
12796                "armg_dirtyhelper_AESMC", "armg_dirtyhelper_AESIMC" };
12797         const HChar* iNames[4]
12798            = { "aese", "aesd", "aesmc", "aesimc" };
12799
12800         vassert(opc >= 0 && opc <= 3);
12801         void*        helper = helpers[opc];
12802         const HChar* hname  = hNames[opc];
12803
12804         IRTemp w32_3, w32_2, w32_1, w32_0;
12805         w32_3 = w32_2 = w32_1 = w32_0 = IRTemp_INVALID;
12806         breakupV128to32s( src, &w32_3, &w32_2, &w32_1, &w32_0 );
12807
12808         IRDirty* di
12809           = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
12810                                mkIRExprVec_5(
12811                                   IRExpr_VECRET(),
12812                                   mkexpr(w32_3), mkexpr(w32_2),
12813                                   mkexpr(w32_1), mkexpr(w32_0)) );
12814         stmt(IRStmt_Dirty(di));
12815
12816         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
12817         DIP("%s.8 q%u, q%u\n", iNames[opc], regD >> 1, regM >> 1);
12818         return True;
12819      }
12820      /* fall through */
12821    }
12822
12823    /* ----------- SHA 3-reg insns q_q_q ----------- */
12824    /*
12825           31   27   23      19 15 11   7       3
12826       T1: 1110 1111 0  D 00 n  d  1100 N Q M 0 m  SHA1C Qd, Qn, Qm  ix=0
12827       A1: 1111 0010 ----------------------------
12828
12829       T1: 1110 1111 0  D 01 n  d  1100 N Q M 0 m  SHA1P Qd, Qn, Qm  ix=1
12830       A1: 1111 0010 ----------------------------
12831
12832       T1: 1110 1111 0  D 10 n  d  1100 N Q M 0 m  SHA1M Qd, Qn, Qm  ix=2
12833       A1: 1111 0010 ----------------------------
12834
12835       T1: 1110 1111 0  D 11 n  d  1100 N Q M 0 m  SHA1SU0 Qd, Qn, Qm  ix=3
12836       A1: 1111 0010 ----------------------------
12837       (that's a complete set of 4, based on insn[21,20])
12838
12839       T1: 1111 1111 0  D 00 n  d  1100 N Q M 0 m  SHA256H Qd, Qn, Qm  ix=4
12840       A1: 1111 0011 ----------------------------
12841
12842       T1: 1111 1111 0  D 01 n  d  1100 N Q M 0 m  SHA256H2 Qd, Qn, Qm  ix=5
12843       A1: 1111 0011 ----------------------------
12844
12845       T1: 1111 1111 0  D 10 n  d  1100 N Q M 0 m  SHA256SU1 Qd, Qn, Qm  ix=6
12846       A1: 1111 0011 ----------------------------
12847       (3/4 of a complete set of 4, based on insn[21,20])
12848
12849       Q must be 1.  Same comments about conditionalisation as for the AES
12850       group above apply.
12851    */
12852    {
12853      UInt ix = 8; /* invalid */
12854      Bool gate = False;
12855
12856      UInt hi9_sha1   = isT ? BITS9(1,1,1,0,1,1,1,1,0)
12857                            : BITS9(1,1,1,1,0,0,1,0,0);
12858      UInt hi9_sha256 = isT ? BITS9(1,1,1,1,1,1,1,1,0)
12859                            : BITS9(1,1,1,1,0,0,1,1,0);
12860      if ((INSN(31,23) == hi9_sha1 || INSN(31,23) == hi9_sha256)
12861          && INSN(11,8) == BITS4(1,1,0,0)
12862          && INSN(6,6) == 1 && INSN(4,4) == 0) {
12863         ix = INSN(21,20);
12864         if (INSN(31,23) == hi9_sha256)
12865            ix |= 4;
12866         if (ix < 7)
12867            gate = True;
12868      }
12869
12870      UInt regN = (INSN(7,7)   << 4)  | INSN(19,16);
12871      UInt regD = (INSN(22,22) << 4)  | INSN(15,12);
12872      UInt regM = (INSN(5,5)   << 4)  | INSN(3,0);
12873      if ((regD & 1) == 1 || (regM & 1) == 1 || (regN & 1) == 1)
12874         gate = False;
12875
12876      if (gate) {
12877         vassert(ix >= 0 && ix < 7);
12878         const HChar* inames[7]
12879            = { "sha1c", "sha1p", "sha1m", "sha1su0",
12880                "sha256h", "sha256h2", "sha256su1" };
12881         void(*helpers[7])(V128*,UInt,UInt,UInt,UInt,UInt,UInt,
12882                                 UInt,UInt,UInt,UInt,UInt,UInt)
12883            = { &armg_dirtyhelper_SHA1C,    &armg_dirtyhelper_SHA1P,
12884                &armg_dirtyhelper_SHA1M,    &armg_dirtyhelper_SHA1SU0,
12885                &armg_dirtyhelper_SHA256H,  &armg_dirtyhelper_SHA256H2,
12886                &armg_dirtyhelper_SHA256SU1 };
12887         const HChar* hnames[7]
12888            = { "armg_dirtyhelper_SHA1C",    "armg_dirtyhelper_SHA1P",
12889                "armg_dirtyhelper_SHA1M",    "armg_dirtyhelper_SHA1SU0",
12890                "armg_dirtyhelper_SHA256H",  "armg_dirtyhelper_SHA256H2",
12891                "armg_dirtyhelper_SHA256SU1" };
12892
12893         /* This is a really lame way to implement this, even worse than
12894            the arm64 version.  But at least it works. */
12895
12896         if (isT) {
12897            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
12898         }
12899
12900         IRTemp vD = newTemp(Ity_V128);
12901         IRTemp vN = newTemp(Ity_V128);
12902         IRTemp vM = newTemp(Ity_V128);
12903         assign(vD,  getQReg(regD >> 1));
12904         assign(vN,  getQReg(regN >> 1));
12905         assign(vM,  getQReg(regM >> 1));
12906
12907         IRTemp d32_3, d32_2, d32_1, d32_0;
12908         d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID;
12909         breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 );
12910
12911         IRTemp n32_3_pre, n32_2_pre, n32_1_pre, n32_0_pre;
12912         n32_3_pre = n32_2_pre = n32_1_pre = n32_0_pre = IRTemp_INVALID;
12913         breakupV128to32s( vN, &n32_3_pre, &n32_2_pre, &n32_1_pre, &n32_0_pre );
12914
12915         IRTemp m32_3, m32_2, m32_1, m32_0;
12916         m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
12917         breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
12918
12919         IRTemp n32_3 = newTemp(Ity_I32);
12920         IRTemp n32_2 = newTemp(Ity_I32);
12921         IRTemp n32_1 = newTemp(Ity_I32);
12922         IRTemp n32_0 = newTemp(Ity_I32);
12923
12924         /* Mask off any bits of the N register operand that aren't actually
12925            needed, so that Memcheck doesn't complain unnecessarily. */
12926         switch (ix) {
12927            case 0: case 1: case 2:
12928               assign(n32_3, mkU32(0));
12929               assign(n32_2, mkU32(0));
12930               assign(n32_1, mkU32(0));
12931               assign(n32_0, mkexpr(n32_0_pre));
12932               break;
12933            case 3: case 4: case 5: case 6:
12934               assign(n32_3, mkexpr(n32_3_pre));
12935               assign(n32_2, mkexpr(n32_2_pre));
12936               assign(n32_1, mkexpr(n32_1_pre));
12937               assign(n32_0, mkexpr(n32_0_pre));
12938               break;
12939            default:
12940               vassert(0);
12941         }
12942
12943         IRExpr** argvec
12944            = mkIRExprVec_13(
12945                 IRExpr_VECRET(),
12946                 mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0),
12947                 mkexpr(n32_3), mkexpr(n32_2), mkexpr(n32_1), mkexpr(n32_0),
12948                 mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0)
12949              );
12950
12951         IRTemp res = newTemp(Ity_V128);
12952         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
12953                                          hnames[ix], helpers[ix], argvec );
12954         stmt(IRStmt_Dirty(di));
12955         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
12956
12957         DIP("%s.8 q%u, q%u, q%u\n",
12958             inames[ix], regD >> 1, regN >> 1, regM >> 1);
12959         return True;
12960      }
12961      /* fall through */
12962    }
12963
12964    /* ----------- SHA1SU1, SHA256SU0 ----------- */
12965    /*
12966           31   27   23  21 19   15 11   7      3
12967       T1: 1111 1111 1 D 11 1010 d  0011 10 M 0 m  SHA1SU1 Qd, Qm
12968       A1: 1111 0011 ----------------------------
12969
12970       T1: 1111 1111 1 D 11 1010 d  0011 11 M 0 m  SHA256SU0 Qd, Qm
12971       A1: 1111 0011 ----------------------------
12972
12973       Same comments about conditionalisation as for the AES group above apply.
12974    */
12975    {
12976      Bool gate = False;
12977
12978      UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
12979      if (INSN(31,23) == hi9 && INSN(21,16) == BITS6(1,1,1,0,1,0)
12980          && INSN(11,7) == BITS5(0,0,1,1,1) && INSN(4,4) == 0) {
12981         gate = True;
12982      }
12983
12984      UInt regD = (INSN(22,22) << 4) | INSN(15,12);
12985      UInt regM = (INSN(5,5)   << 4) | INSN(3,0);
12986      if ((regD & 1) == 1 || (regM & 1) == 1)
12987         gate = False;
12988
12989      Bool is_1SU1 = INSN(6,6) == 0;
12990
12991      if (gate) {
12992         const HChar* iname
12993            = is_1SU1 ? "sha1su1" : "sha256su0";
12994         void (*helper)(V128*,UInt,UInt,UInt,UInt,UInt,UInt,UInt,UInt)
12995            = is_1SU1 ? &armg_dirtyhelper_SHA1SU1
12996                      : *armg_dirtyhelper_SHA256SU0;
12997         const HChar* hname
12998            = is_1SU1 ? "armg_dirtyhelper_SHA1SU1"
12999                      : "armg_dirtyhelper_SHA256SU0";
13000
13001         if (isT) {
13002            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13003         }
13004
13005         IRTemp vD = newTemp(Ity_V128);
13006         IRTemp vM = newTemp(Ity_V128);
13007         assign(vD,  getQReg(regD >> 1));
13008         assign(vM,  getQReg(regM >> 1));
13009
13010         IRTemp d32_3, d32_2, d32_1, d32_0;
13011         d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID;
13012         breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 );
13013
13014         IRTemp m32_3, m32_2, m32_1, m32_0;
13015         m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
13016         breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
13017
13018         IRExpr** argvec
13019            = mkIRExprVec_9(
13020                 IRExpr_VECRET(),
13021                 mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0),
13022                 mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0)
13023              );
13024
13025         IRTemp res = newTemp(Ity_V128);
13026         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13027                                          hname, helper, argvec );
13028         stmt(IRStmt_Dirty(di));
13029         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
13030
13031         DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1);
13032         return True;
13033      }
13034      /* fall through */
13035    }
13036
13037    /* ----------- SHA1H ----------- */
13038    /*
13039           31   27   23  21 19   15 11   7      3
13040       T1: 1111 1111 1 D 11 1001 d  0010 11 M 0 m  SHA1H Qd, Qm
13041       A1: 1111 0011 ----------------------------
13042
13043       Same comments about conditionalisation as for the AES group above apply.
13044    */
13045    {
13046      Bool gate = False;
13047
13048      UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
13049      if (INSN(31,23) == hi9 && INSN(21,16) == BITS6(1,1,1,0,0,1)
13050          && INSN(11,6) == BITS6(0,0,1,0,1,1) && INSN(4,4) == 0) {
13051         gate = True;
13052      }
13053
13054      UInt regD = (INSN(22,22) << 4) | INSN(15,12);
13055      UInt regM = (INSN(5,5)   << 4) | INSN(3,0);
13056      if ((regD & 1) == 1 || (regM & 1) == 1)
13057         gate = False;
13058
13059      if (gate) {
13060         const HChar* iname = "sha1h";
13061         void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_SHA1H;
13062         const HChar* hname                        = "armg_dirtyhelper_SHA1H";
13063
13064         if (isT) {
13065            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13066         }
13067
13068         IRTemp vM = newTemp(Ity_V128);
13069         assign(vM,  getQReg(regM >> 1));
13070
13071         IRTemp m32_3, m32_2, m32_1, m32_0;
13072         m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
13073         breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
13074         /* m32_3, m32_2, m32_1 are just abandoned.  No harm; iropt will
13075            remove them. */
13076
13077         IRExpr*  zero   = mkU32(0);
13078         IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(),
13079                                         zero, zero, zero, mkexpr(m32_0));
13080
13081         IRTemp res = newTemp(Ity_V128);
13082         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13083                                          hname, helper, argvec );
13084         stmt(IRStmt_Dirty(di));
13085         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
13086
13087         DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1);
13088         return True;
13089      }
13090      /* fall through */
13091    }
13092
13093    /* ----------- VMULL.P64 ----------- */
13094    /*
13095           31   27   23  21 19 15 11   7       3
13096       T2: 1110 1111 1 D 10 n  d  1110 N 0 M 0 m
13097       A2: 1111 0010 -------------------------
13098
13099       The ARM documentation is pretty difficult to follow here.
13100       Same comments about conditionalisation as for the AES group above apply.
13101    */
13102    {
13103      Bool gate = False;
13104
13105      UInt hi9 = isT ? BITS9(1,1,1,0,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,0,1);
13106      if (INSN(31,23) == hi9 && INSN(21,20) == BITS2(1,0)
13107          && INSN(11,8) == BITS4(1,1,1,0)
13108          && INSN(6,6) == 0 && INSN(4,4) == 0) {
13109         gate = True;
13110      }
13111
13112      UInt regN = (INSN(7,7)   << 4)  | INSN(19,16);
13113      UInt regD = (INSN(22,22) << 4)  | INSN(15,12);
13114      UInt regM = (INSN(5,5)   << 4)  | INSN(3,0);
13115
13116      if ((regD & 1) == 1)
13117         gate = False;
13118
13119      if (gate) {
13120         const HChar* iname = "vmull";
13121         void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_VMULLP64;
13122         const HChar* hname                        = "armg_dirtyhelper_VMULLP64";
13123
13124         if (isT) {
13125            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13126         }
13127
13128         IRTemp srcN = newTemp(Ity_I64);
13129         IRTemp srcM = newTemp(Ity_I64);
13130         assign(srcN, getDRegI64(regN));
13131         assign(srcM, getDRegI64(regM));
13132
13133         IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(),
13134                                         unop(Iop_64HIto32, mkexpr(srcN)),
13135                                         unop(Iop_64to32,   mkexpr(srcN)),
13136                                         unop(Iop_64HIto32, mkexpr(srcM)),
13137                                         unop(Iop_64to32, mkexpr(srcM)));
13138
13139         IRTemp res = newTemp(Ity_V128);
13140         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13141                                          hname, helper, argvec );
13142         stmt(IRStmt_Dirty(di));
13143         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
13144
13145         DIP("%s.p64 q%u, q%u, w%u\n", iname, regD >> 1, regN, regM);
13146         return True;
13147      }
13148      /* fall through */
13149    }
13150
13151    /* ----------- LDA{,B,H}, STL{,B,H} ----------- */
13152    /*     31   27   23   19   15 11   7    3
13153       A1: cond 0001 1001  n    t 1100 1001 1111  LDA  Rt, [Rn]
13154       A1: cond 0001 1111  n    t 1100 1001 1111  LDAH Rt, [Rn]
13155       A1: cond 0001 1101  n    t 1100 1001 1111  LDAB Rt, [Rn]
13156
13157       A1: cond 0001 1000  n 1111 1100 1001    t  STL  Rt, [Rn]
13158       A1: cond 0001 1110  n 1111 1100 1001    t  STLH Rt, [Rn]
13159       A1: cond 0001 1100  n 1111 1100 1001    t  STLB Rt, [Rn]
13160
13161       T1: 1110 1000 1101  n    t 1111 1010 1111  LDA  Rt, [Rn]
13162       T1: 1110 1000 1101  n    t 1111 1001 1111  LDAH Rt, [Rn]
13163       T1: 1110 1000 1101  n    t 1111 1000 1111  LDAB Rt, [Rn]
13164
13165       T1: 1110 1000 1100  n    t 1111 1010 1111  STL  Rt, [Rn]
13166       T1: 1110 1000 1100  n    t 1111 1001 1111  STLH Rt, [Rn]
13167       T1: 1110 1000 1100  n    t 1111 1000 1111  STLB Rt, [Rn]
13168    */
13169    {
13170      UInt nn     = 16; // invalid
13171      UInt tt     = 16; // invalid
13172      UInt szBlg2 = 4;  // invalid
13173      Bool isLoad = False;
13174      Bool gate   = False;
13175      if (isT) {
13176         if (INSN(31,21) == BITS11(1,1,1,0,1,0,0,0,1,1,0)
13177             && INSN(11,6) == BITS6(1,1,1,1,1,0)
13178             && INSN(3,0) == BITS4(1,1,1,1)) {
13179            nn     = INSN(19,16);
13180            tt     = INSN(15,12);
13181            isLoad = INSN(20,20) == 1;
13182            szBlg2 = INSN(5,4); // 00:B 01:H 10:W 11:invalid
13183            gate   = szBlg2 != BITS2(1,1) && tt != 15 && nn != 15;
13184         }
13185      } else {
13186         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 1
13187             && INSN(11,0) == BITS12(1,1,0,0,1,0,0,1,1,1,1,1)) {
13188            nn     = INSN(19,16);
13189            tt     = INSN(15,12);
13190            isLoad = True;
13191            szBlg2     = INSN(22,21); // 10:B 11:H 00:W 01:invalid
13192            gate   = szBlg2 != BITS2(0,1) && tt != 15 && nn != 15;
13193         }
13194         else
13195         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 0
13196             && INSN(15,4) == BITS12(1,1,1,1,1,1,0,0,1,0,0,1)) {
13197            nn     = INSN(19,16);
13198            tt     = INSN(3,0);
13199            isLoad = False;
13200            szBlg2     = INSN(22,21);  // 10:B 11:H 00:W 01:invalid
13201            gate   = szBlg2 != BITS2(0,1) && tt != 15 && nn != 15;
13202         }
13203         if (gate) {
13204            // Rearrange szBlg2 bits to be the same as the Thumb case
13205            switch (szBlg2) {
13206               case 2: szBlg2 = 0; break;
13207               case 3: szBlg2 = 1; break;
13208               case 0: szBlg2 = 2; break;
13209               default: /*NOTREACHED*/vassert(0);
13210            }
13211         }
13212      }
13213      // For both encodings, the instruction is guarded by condT, which
13214      // is passed in by the caller.  Note that the the loads and stores
13215      // are conditional, so we don't have to truncate the IRSB at this
13216      // point, but the fence is unconditional.  There's no way to
13217      // represent a conditional fence without a side exit, but it
13218      // doesn't matter from a correctness standpoint that it is
13219      // unconditional -- it just loses a bit of performance in the
13220      // case where the condition doesn't hold.
13221      if (gate) {
13222         vassert(szBlg2 <= 2 && nn <= 14 && tt <= 14);
13223         IRExpr* ea = llGetIReg(nn);
13224         if (isLoad) {
13225            static IRLoadGOp cvt[3]
13226               = { ILGop_8Uto32, ILGop_16Uto32, ILGop_Ident32 };
13227            IRTemp data = newTemp(Ity_I32);
13228            loadGuardedLE(data, cvt[szBlg2], ea, mkU32(0)/*alt*/, condT);
13229            if (isT) {
13230               putIRegT(tt, mkexpr(data), condT);
13231            } else {
13232               putIRegA(tt, mkexpr(data), condT, Ijk_INVALID);
13233            }
13234            stmt(IRStmt_MBE(Imbe_Fence));
13235         } else {
13236            stmt(IRStmt_MBE(Imbe_Fence));
13237            IRExpr* data = llGetIReg(tt);
13238            switch (szBlg2) {
13239               case 0: data = unop(Iop_32to8,  data); break;
13240               case 1: data = unop(Iop_32to16, data); break;
13241               case 2: break;
13242               default: vassert(0);
13243            }
13244            storeGuardedLE(ea, data, condT);
13245         }
13246         const HChar* ldNames[3] = { "ldab", "ldah", "lda" };
13247         const HChar* stNames[3] = { "stlb", "stlh", "stl" };
13248         DIP("%s r%u, [r%u]", (isLoad ? ldNames : stNames)[szBlg2], tt, nn);
13249         return True;
13250      }
13251      /* else fall through */
13252    }
13253
13254    /* ----------- LDAEX{,B,H,D}, STLEX{,B,H,D} ----------- */
13255    /*     31   27   23   19 15 11   7    3
13256       A1: cond 0001 1101 n  t  1110 1001 1111  LDAEXB Rt, [Rn]
13257       A1: cond 0001 1111 n  t  1110 1001 1111  LDAEXH Rt, [Rn]
13258       A1: cond 0001 1001 n  t  1110 1001 1111  LDAEX  Rt, [Rn]
13259       A1: cond 0001 1011 n  t  1110 1001 1111  LDAEXD Rt, Rt+1, [Rn]
13260
13261       A1: cond 0001 1100 n  d  1110 1001 t     STLEXB Rd, Rt, [Rn]
13262       A1: cond 0001 1110 n  d  1110 1001 t     STLEXH Rd, Rt, [Rn]
13263       A1: cond 0001 1000 n  d  1110 1001 t     STLEX  Rd, Rt, [Rn]
13264       A1: cond 0001 1010 n  d  1110 1001 t     STLEXD Rd, Rt, Rt+1, [Rn]
13265
13266           31  28   24    19 15 11   7    3
13267       T1: 111 0100 01101 n  t  1111 1100 1111  LDAEXB Rt, [Rn]
13268       T1: 111 0100 01101 n  t  1111 1101 1111  LDAEXH Rt, [Rn]
13269       T1: 111 0100 01101 n  t  1111 1110 1111  LDAEX  Rt, [Rn]
13270       T1: 111 0100 01101 n  t  t2   1111 1111  LDAEXD Rt, Rt2, [Rn]
13271
13272       T1: 111 0100 01100 n  t  1111 1100 d     STLEXB Rd, Rt, [Rn]
13273       T1: 111 0100 01100 n  t  1111 1101 d     STLEXH Rd, Rt, [Rn]
13274       T1: 111 0100 01100 n  t  1111 1110 d     STLEX  Rd, Rt, [Rn]
13275       T1: 111 0100 01100 n  t  t2   1111 d     STLEXD Rd, Rt, Rt2, [Rn]
13276    */
13277    {
13278      UInt nn     = 16; // invalid
13279      UInt tt     = 16; // invalid
13280      UInt tt2    = 16; // invalid
13281      UInt dd     = 16; // invalid
13282      UInt szBlg2 = 4;  // invalid
13283      Bool isLoad = False;
13284      Bool gate   = False;
13285      if (isT) {
13286         if (INSN(31,21) == BITS11(1,1,1,0,1,0,0,0,1,1,0)
13287             && INSN(7,6) == BITS2(1,1)) {
13288            isLoad = INSN(20,20) == 1;
13289            nn     = INSN(19,16);
13290            tt     = INSN(15,12);
13291            tt2    = INSN(11,8);
13292            szBlg2 = INSN(5,4);
13293            dd     = INSN(3,0);
13294            gate   = True;
13295            if (szBlg2 < BITS2(1,1) && tt2 != BITS4(1,1,1,1)) gate = False;
13296            if (isLoad && dd != BITS4(1,1,1,1)) gate = False;
13297            // re-set not-used register values to invalid
13298            if (szBlg2 < BITS2(1,1)) tt2 = 16;
13299            if (isLoad) dd = 16;
13300         }
13301      } else {
13302         /* ARM encoding.  Do the load and store cases separately as
13303            the register numbers are in different places and a combined decode
13304            is too confusing. */
13305         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 1
13306             && INSN(11,0) == BITS12(1,1,1,0,1,0,0,1,1,1,1,1)) {
13307            szBlg2 = INSN(22,21);
13308            isLoad = True;
13309            nn     = INSN(19,16);
13310            tt     = INSN(15,12);
13311            gate   = True;
13312         }
13313         else
13314         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 0
13315             && INSN(11,4) == BITS8(1,1,1,0,1,0,0,1)) {
13316            szBlg2 = INSN(22,21);
13317            isLoad = False;
13318            nn     = INSN(19,16);
13319            dd     = INSN(15,12);
13320            tt     = INSN(3,0);
13321            gate   = True;
13322         }
13323         if (gate) {
13324            // Rearrange szBlg2 bits to be the same as the Thumb case
13325            switch (szBlg2) {
13326               case 2: szBlg2 = 0; break;
13327               case 3: szBlg2 = 1; break;
13328               case 0: szBlg2 = 2; break;
13329               case 1: szBlg2 = 3; break;
13330               default: /*NOTREACHED*/vassert(0);
13331            }
13332         }
13333      }
13334      // Perform further checks on register numbers
13335      if (gate) {
13336         /**/ if (isT && isLoad) {
13337            // Thumb load
13338            if (szBlg2 < 3) {
13339               if (! (tt != 13 && tt != 15 && nn != 15)) gate = False;
13340            } else {
13341               if (! (tt != 13 && tt != 15 && tt2 != 13 && tt2 != 15
13342                      && tt != tt2 && nn != 15)) gate = False;
13343            }
13344         }
13345         else if (isT && !isLoad) {
13346            // Thumb store
13347            if (szBlg2 < 3) {
13348               if (! (dd != 13 && dd != 15 && tt != 13 && tt != 15
13349                      && nn != 15 && dd != nn && dd != tt)) gate = False;
13350            } else {
13351               if (! (dd != 13 && dd != 15 && tt != 13 && tt != 15
13352                      && tt2 != 13 && tt2 != 15 && nn != 15 && dd != nn
13353                      && dd != tt && dd != tt2)) gate = False;
13354            }
13355         }
13356         else if (!isT && isLoad) {
13357            // ARM Load
13358            if (szBlg2 < 3) {
13359               if (! (tt != 15 && nn != 15)) gate = False;
13360            } else {
13361               if (! ((tt & 1) == 0 && tt != 14 && nn != 15)) gate = False;
13362               vassert(tt2 == 16/*invalid*/);
13363               tt2 = tt + 1;
13364            }
13365         }
13366         else if (!isT && !isLoad) {
13367            // ARM Store
13368            if (szBlg2 < 3) {
13369               if (! (dd != 15 && tt != 15 && nn != 15
13370                      && dd != nn && dd != tt)) gate = False;
13371            } else {
13372               if (! (dd != 15 && (tt & 1) == 0 && tt != 14 && nn != 15
13373                      && dd != nn && dd != tt && dd != tt+1)) gate = False;
13374               vassert(tt2 == 16/*invalid*/);
13375               tt2 = tt + 1;
13376            }
13377         }
13378         else /*NOTREACHED*/vassert(0);
13379      }
13380      if (gate) {
13381         // Paranoia ..
13382         vassert(szBlg2 <= 3);
13383         if (szBlg2 < 3) { vassert(tt2 == 16/*invalid*/); }
13384                    else { vassert(tt2 <= 14); }
13385         if (isLoad) { vassert(dd == 16/*invalid*/); }
13386                else { vassert(dd <= 14); }
13387      }
13388      // If we're still good even after all that, generate the IR.
13389      if (gate) {
13390         /* First, go unconditional.  Staying in-line is too complex. */
13391         if (isT) {
13392            vassert(condT != IRTemp_INVALID);
13393            mk_skip_over_T32_if_cond_is_false( condT );
13394         } else {
13395            if (condT != IRTemp_INVALID) {
13396               mk_skip_over_A32_if_cond_is_false( condT );
13397               condT = IRTemp_INVALID;
13398            }
13399         }
13400         /* Now the load or store. */
13401         IRType ty = Ity_INVALID; /* the type of the transferred data */
13402         const HChar* nm = NULL;
13403         switch (szBlg2) {
13404            case 0: nm = "b"; ty = Ity_I8;  break;
13405            case 1: nm = "h"; ty = Ity_I16; break;
13406            case 2: nm = "";  ty = Ity_I32; break;
13407            case 3: nm = "d"; ty = Ity_I64; break;
13408            default: vassert(0);
13409         }
13410         IRExpr* ea = isT ? getIRegT(nn) : getIRegA(nn);
13411         if (isLoad) {
13412            // LOAD.  Transaction, then fence.
13413            IROp widen = Iop_INVALID;
13414            switch (szBlg2) {
13415               case 0: widen = Iop_8Uto32;  break;
13416               case 1: widen = Iop_16Uto32; break;
13417               case 2: case 3: break;
13418               default: vassert(0);
13419            }
13420            IRTemp  res = newTemp(ty);
13421            // FIXME: assumes little-endian guest
13422            stmt( IRStmt_LLSC(Iend_LE, res, ea, NULL/*this is a load*/) );
13423
13424 #          define PUT_IREG(_nnz, _eez) \
13425               do { vassert((_nnz) <= 14); /* no writes to the PC */ \
13426                    if (isT) { putIRegT((_nnz), (_eez), IRTemp_INVALID); } \
13427                        else { putIRegA((_nnz), (_eez), \
13428                               IRTemp_INVALID, Ijk_Boring); } } while(0)
13429            if (ty == Ity_I64) {
13430               // FIXME: assumes little-endian guest
13431               PUT_IREG(tt,  unop(Iop_64to32, mkexpr(res)));
13432               PUT_IREG(tt2, unop(Iop_64HIto32, mkexpr(res)));
13433            } else {
13434               PUT_IREG(tt, widen == Iop_INVALID
13435                               ? mkexpr(res) : unop(widen, mkexpr(res)));
13436            }
13437            stmt(IRStmt_MBE(Imbe_Fence));
13438            if (ty == Ity_I64) {
13439               DIP("ldrex%s%s r%u, r%u, [r%u]\n",
13440                   nm, isT ? "" : nCC(conq), tt, tt2, nn);
13441            } else {
13442               DIP("ldrex%s%s r%u, [r%u]\n", nm, isT ? "" : nCC(conq), tt, nn);
13443            }
13444 #          undef PUT_IREG
13445         } else {
13446            // STORE.  Fence, then transaction.
13447            IRTemp resSC1, resSC32, data;
13448            IROp   narrow = Iop_INVALID;
13449            switch (szBlg2) {
13450               case 0: narrow = Iop_32to8; break;
13451               case 1: narrow = Iop_32to16; break;
13452               case 2: case 3: break;
13453               default: vassert(0);
13454            }
13455            stmt(IRStmt_MBE(Imbe_Fence));
13456            data = newTemp(ty);
13457 #          define GET_IREG(_nnz) (isT ? getIRegT(_nnz) : getIRegA(_nnz))
13458            assign(data,
13459                   ty == Ity_I64
13460                      // FIXME: assumes little-endian guest
13461                      ? binop(Iop_32HLto64, GET_IREG(tt2), GET_IREG(tt))
13462                      : narrow == Iop_INVALID
13463                         ? GET_IREG(tt)
13464                         : unop(narrow, GET_IREG(tt)));
13465 #          undef GET_IREG
13466            resSC1 = newTemp(Ity_I1);
13467            // FIXME: assumes little-endian guest
13468            stmt( IRStmt_LLSC(Iend_LE, resSC1, ea, mkexpr(data)) );
13469
13470            /* Set rDD to 1 on failure, 0 on success.  Currently we have
13471               resSC1 == 0 on failure, 1 on success. */
13472            resSC32 = newTemp(Ity_I32);
13473            assign(resSC32,
13474                   unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
13475            vassert(dd <= 14); /* no writes to the PC */
13476            if (isT) {
13477               putIRegT(dd, mkexpr(resSC32), IRTemp_INVALID);
13478            } else {
13479               putIRegA(dd, mkexpr(resSC32), IRTemp_INVALID, Ijk_Boring);
13480            }
13481            if (ty == Ity_I64) {
13482               DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
13483                   nm, isT ? "" : nCC(conq), dd, tt, tt2, nn);
13484            } else {
13485               DIP("strex%s%s r%u, r%u, [r%u]\n",
13486                   nm, isT ? "" : nCC(conq), dd, tt, nn);
13487            }
13488         } /* if (isLoad) */
13489         return True;
13490      } /* if (gate) */
13491      /* else fall through */
13492    }
13493
13494    /* ----------- VSEL<c>.F64 d_d_d, VSEL<c>.F32 s_s_s ----------- */
13495    /*        31   27    22 21 19 15 11  8 7 6 5 4 3
13496       T1/A1: 1111 11100 D  cc n  d  101 1 N 0 M 0 m  VSEL<c>.F64 Dd, Dn, Dm
13497       T1/A1: 1111 11100 D  cc n  d  101 0 N 0 M 0 m  VSEL<c>.F32 Sd, Sn, Sm
13498
13499       ARM encoding is in NV space.
13500       In Thumb mode, we must not be in an IT block.
13501    */
13502    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,0) && INSN(11,9) == BITS3(1,0,1)
13503        && INSN(6,6) == 0 && INSN(4,4) == 0) {
13504       UInt bit_D  = INSN(22,22);
13505       UInt fld_cc = INSN(21,20);
13506       UInt fld_n  = INSN(19,16);
13507       UInt fld_d  = INSN(15,12);
13508       Bool isF64  = INSN(8,8) == 1;
13509       UInt bit_N  = INSN(7,7);
13510       UInt bit_M  = INSN(5,5);
13511       UInt fld_m  = INSN(3,0);
13512
13513       UInt dd = isF64 ? ((bit_D << 4) | fld_d) : ((fld_d << 1) | bit_D);
13514       UInt nn = isF64 ? ((bit_N << 4) | fld_n) : ((fld_n << 1) | bit_N);
13515       UInt mm = isF64 ? ((bit_M << 4) | fld_m) : ((fld_m << 1) | bit_M);
13516
13517       UInt cc_1 = (fld_cc >> 1) & 1;
13518       UInt cc_0 = (fld_cc >> 0) & 1;
13519       UInt cond = (fld_cc << 2) | ((cc_1 ^ cc_0) << 1) | 0;
13520
13521       if (isT) {
13522          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13523       }
13524       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13525          this must be dynamically unconditional, and we've SIGILLd if not.
13526          In either case we can create unconditional IR. */
13527
13528       IRTemp guard = newTemp(Ity_I32);
13529       assign(guard, mk_armg_calculate_condition(cond));
13530       IRExpr* srcN = (isF64 ? llGetDReg : llGetFReg)(nn);
13531       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13532       IRExpr* res  = IRExpr_ITE(unop(Iop_32to1, mkexpr(guard)), srcN, srcM);
13533       (isF64 ? llPutDReg : llPutFReg)(dd, res);
13534
13535       UChar rch = isF64 ? 'd' : 'f';
13536       DIP("vsel%s.%s %c%u, %c%u, %c%u\n",
13537           nCC(cond), isF64 ? "f64" : "f32", rch, dd, rch, nn, rch, mm);
13538       return True;
13539    }
13540
13541    /* -------- VRINT{A,N,P,M}.F64 d_d, VRINT{A,N,P,M}.F32 s_s -------- */
13542    /*        31        22 21   17 15 11  8 7  5 4 3
13543       T1/A1: 111111101 D  1110 rm Vd 101 1 01 M 0 Vm VRINT{A,N,P,M}.F64 Dd, Dm
13544       T1/A1: 111111101 D  1110 rm Vd 101 0 01 M 0 Vm VRINT{A,N,P,M}.F32 Sd, Sm
13545
13546       ARM encoding is in NV space.
13547       In Thumb mode, we must not be in an IT block.
13548    */
13549    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1)
13550        && INSN(21,18) == BITS4(1,1,1,0) && INSN(11,9) == BITS3(1,0,1)
13551        && INSN(7,6) == BITS2(0,1) && INSN(4,4) == 0) {
13552       UInt bit_D  = INSN(22,22);
13553       UInt fld_rm = INSN(17,16);
13554       UInt fld_d  = INSN(15,12);
13555       Bool isF64  = INSN(8,8) == 1;
13556       UInt bit_M  = INSN(5,5);
13557       UInt fld_m  = INSN(3,0);
13558
13559       UInt dd = isF64 ? ((bit_D << 4) | fld_d) : ((fld_d << 1) | bit_D);
13560       UInt mm = isF64 ? ((bit_M << 4) | fld_m) : ((fld_m << 1) | bit_M);
13561
13562       if (isT) {
13563          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13564       }
13565       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13566          this must be dynamically unconditional, and we've SIGILLd if not.
13567          In either case we can create unconditional IR. */
13568
13569       UChar c = '?';
13570       IRRoundingMode rm = Irrm_NEAREST;
13571       switch (fld_rm) {
13572          /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
13573             kludge since it doesn't take into account the nearest-even vs
13574             nearest-away semantics. */
13575          case BITS2(0,0): c = 'a'; rm = Irrm_NEAREST; break;
13576          case BITS2(0,1): c = 'n'; rm = Irrm_NEAREST; break;
13577          case BITS2(1,0): c = 'p'; rm = Irrm_PosINF;  break;
13578          case BITS2(1,1): c = 'm'; rm = Irrm_NegINF;  break;
13579          default: vassert(0);
13580       }
13581
13582       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13583       IRExpr* res  = binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13584                            mkU32((UInt)rm), srcM);
13585       (isF64 ? llPutDReg : llPutFReg)(dd, res);
13586
13587       UChar rch = isF64 ? 'd' : 'f';
13588       DIP("vrint%c.%s.%s %c%u, %c%u\n",
13589           c, isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
13590       return True;
13591    }
13592
13593    /* -------- VRINT{Z,R}.F64.F64 d_d, VRINT{Z,R}.F32.F32 s_s -------- */
13594    /*     31   27    22 21     15 11   7  6 5 4 3
13595       T1: 1110 11101 D  110110 Vd 1011 op 1 M 0 Vm VRINT<r><c>.F64.F64 Dd, Dm
13596       A1: cond 11101 D  110110 Vd 1011 op 1 M 0 Vm
13597
13598       T1: 1110 11101 D  110110 Vd 1010 op 1 M 0 Vm VRINT<r><c>.F32.F32 Sd, Sm
13599       A1: cond 11101 D  110110 Vd 1010 op 1 M 0 Vm
13600
13601       In contrast to the VRINT variants just above, this can be conditional.
13602    */
13603    if ((isT ? (INSN(31,28) == BITS4(1,1,1,0)) : True)
13604        && INSN(27,23) == BITS5(1,1,1,0,1) && INSN(21,16) == BITS6(1,1,0,1,1,0)
13605        && INSN(11,9) == BITS3(1,0,1) && INSN(6,6) == 1 && INSN(4,4) == 0) {
13606       UInt bit_D   = INSN(22,22);
13607       UInt fld_Vd  = INSN(15,12);
13608       Bool isF64   = INSN(8,8) == 1;
13609       Bool rToZero = INSN(7,7) == 1;
13610       UInt bit_M   = INSN(5,5);
13611       UInt fld_Vm  = INSN(3,0);
13612       UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
13613       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13614
13615       if (isT) vassert(condT != IRTemp_INVALID);
13616       IRType ty  = isF64 ? Ity_F64 : Ity_F32;
13617       IRTemp src = newTemp(ty);
13618       IRTemp res = newTemp(ty);
13619       assign(src, (isF64 ? getDReg : getFReg)(mm));
13620
13621       IRTemp rm = newTemp(Ity_I32);
13622       assign(rm, rToZero ? mkU32(Irrm_ZERO)
13623                          : mkexpr(mk_get_IR_rounding_mode()));
13624       assign(res, binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13625                         mkexpr(rm), mkexpr(src)));
13626       (isF64 ? putDReg : putFReg)(dd, mkexpr(res), condT);
13627
13628       UChar rch = isF64 ? 'd' : 'f';
13629       DIP("vrint%c.%s.%s %c%u, %c%u\n",
13630           rToZero ? 'z' : 'r',
13631           isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
13632       return True;
13633    }
13634
13635    /* ----------- VCVT{A,N,P,M}{.S32,.U32}{.F64,.F32} ----------- */
13636    /*        31   27    22 21   17 15 11  8  7  6 5 4 3
13637       T1/A1: 1111 11101 D  1111 rm Vd 101 sz op 1 M 0 Vm
13638              VCVT{A,N,P,M}{.S32,.U32}.F64 Sd, Dm
13639              VCVT{A,N,P,M}{.S32,.U32}.F32 Sd, Sm
13640
13641       ARM encoding is in NV space.
13642       In Thumb mode, we must not be in an IT block.
13643    */
13644    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1) && INSN(21,18) == BITS4(1,1,1,1)
13645        && INSN(11,9) == BITS3(1,0,1) && INSN(6,6) == 1 && INSN(4,4) == 0) {
13646       UInt bit_D  = INSN(22,22);
13647       UInt fld_rm = INSN(17,16);
13648       UInt fld_Vd = INSN(15,12);
13649       Bool isF64  = INSN(8,8) == 1;
13650       Bool isU    = INSN(7,7) == 0;
13651       UInt bit_M  = INSN(5,5);
13652       UInt fld_Vm = INSN(3,0);
13653
13654       UInt dd = (fld_Vd << 1) | bit_D;
13655       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13656
13657       if (isT) {
13658          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13659       }
13660       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13661          this must be dynamically unconditional, and we've SIGILLd if not.
13662          In either case we can create unconditional IR. */
13663
13664       UChar c = '?';
13665       IRRoundingMode rm = Irrm_NEAREST;
13666       switch (fld_rm) {
13667          /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
13668             kludge since it doesn't take into account the nearest-even vs
13669             nearest-away semantics. */
13670          case BITS2(0,0): c = 'a'; rm = Irrm_NEAREST; break;
13671          case BITS2(0,1): c = 'n'; rm = Irrm_NEAREST; break;
13672          case BITS2(1,0): c = 'p'; rm = Irrm_PosINF;  break;
13673          case BITS2(1,1): c = 'm'; rm = Irrm_NegINF;  break;
13674          default: vassert(0);
13675       }
13676
13677       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13678       IRTemp   res = newTemp(Ity_I32);
13679
13680       /* The arm back end doesn't support use of Iop_F32toI32U or
13681          Iop_F32toI32S, so for those cases we widen the F32 to F64
13682          and then follow the F64 route. */
13683       if (!isF64) {
13684          srcM = unop(Iop_F32toF64, srcM);
13685       }
13686       assign(res, binop(isU ? Iop_F64toI32U : Iop_F64toI32S,
13687                         mkU32((UInt)rm), srcM));
13688
13689       llPutFReg(dd, unop(Iop_ReinterpI32asF32, mkexpr(res)));
13690
13691       UChar rch = isF64 ? 'd' : 'f';
13692       DIP("vcvt%c.%s.%s %c%u, %c%u\n",
13693           c, isU ? "u32" : "s32", isF64 ? "f64" : "f32", 's', dd, rch, mm);
13694       return True;
13695    }
13696
13697    /* ----------- V{MAX,MIN}NM{.F64 d_d_d, .F32 s_s_s} ----------- */
13698    /* 31   27    22 21 19 15 11  8 7 6  5 4 3
13699       1111 11101 D  00 Vn Vd 101 1 N op M 0 Vm  V{MIN,MAX}NM.F64 Dd, Dn, Dm
13700       1111 11101 D  00 Vn Vd 101 0 N op M 0 Vm  V{MIN,MAX}NM.F32 Sd, Sn, Sm
13701
13702       ARM encoding is in NV space.
13703       In Thumb mode, we must not be in an IT block.
13704    */
13705    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1) && INSN(21,20) == BITS2(0,0)
13706        && INSN(11,9) == BITS3(1,0,1) && INSN(4,4) == 0) {
13707       UInt bit_D  = INSN(22,22);
13708       UInt fld_Vn = INSN(19,16);
13709       UInt fld_Vd = INSN(15,12);
13710       Bool isF64  = INSN(8,8) == 1;
13711       UInt bit_N  = INSN(7,7);
13712       Bool isMAX  = INSN(6,6) == 0;
13713       UInt bit_M  = INSN(5,5);
13714       UInt fld_Vm = INSN(3,0);
13715
13716       UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
13717       UInt nn = isF64 ? ((bit_N << 4) | fld_Vn) : ((fld_Vn << 1) | bit_N);
13718       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13719
13720       if (isT) {
13721          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13722       }
13723       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13724          this must be dynamically unconditional, and we've SIGILLd if not.
13725          In either case we can create unconditional IR. */
13726
13727       IROp op = isF64 ? (isMAX ? Iop_MaxNumF64 : Iop_MinNumF64)
13728                       : (isMAX ? Iop_MaxNumF32 : Iop_MinNumF32);
13729       IRExpr* srcN = (isF64 ? llGetDReg : llGetFReg)(nn);
13730       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13731       IRExpr* res  = binop(op, srcN, srcM);
13732       (isF64 ? llPutDReg : llPutFReg)(dd, res);
13733
13734       UChar rch = isF64 ? 'd' : 'f';
13735       DIP("v%snm.%s %c%u, %c%u, %c%u\n",
13736           isMAX ? "max" : "min", isF64 ? "f64" : "f32",
13737           rch, dd, rch, nn, rch, mm);
13738       return True;
13739    }
13740
13741    /* ----------- VRINTX.F64.F64 d_d, VRINTX.F32.F32 s_s ----------- */
13742    /*     31   27    22 21     15 11  8 7  5 4 3
13743       T1: 1110 11101 D  110111 Vd 101 1 01 M 0 Vm VRINTX<c>.F64.F64 Dd, Dm
13744       A1: cond 11101 D  110111 Vd 101 1 01 M 0 Vm
13745
13746       T1: 1110 11101 D  110111 Vd 101 0 01 M 0 Vm VRINTX<c>.F32.F32 Dd, Dm
13747       A1: cond 11101 D  110111 Vd 101 0 01 M 0 Vm
13748
13749       Like VRINT{Z,R}{.F64.F64, .F32.F32} just above, this can be conditional.
13750       This produces the same code as the VRINTR case since we ignore the
13751       requirement to signal inexactness.
13752    */
13753    if ((isT ? (INSN(31,28) == BITS4(1,1,1,0)) : True)
13754        && INSN(27,23) == BITS5(1,1,1,0,1) && INSN(21,16) == BITS6(1,1,0,1,1,1)
13755        && INSN(11,9) == BITS3(1,0,1) && INSN(7,6) == BITS2(0,1)
13756        && INSN(4,4) == 0) {
13757       UInt bit_D  = INSN(22,22);
13758       UInt fld_Vd = INSN(15,12);
13759       Bool isF64  = INSN(8,8) == 1;
13760       UInt bit_M  = INSN(5,5);
13761       UInt fld_Vm = INSN(3,0);
13762       UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
13763       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13764
13765       if (isT) vassert(condT != IRTemp_INVALID);
13766       IRType ty  = isF64 ? Ity_F64 : Ity_F32;
13767       IRTemp src = newTemp(ty);
13768       IRTemp res = newTemp(ty);
13769       assign(src, (isF64 ? getDReg : getFReg)(mm));
13770
13771       IRTemp rm = newTemp(Ity_I32);
13772       assign(rm, mkexpr(mk_get_IR_rounding_mode()));
13773       assign(res, binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13774                         mkexpr(rm), mkexpr(src)));
13775       (isF64 ? putDReg : putFReg)(dd, mkexpr(res), condT);
13776
13777       UChar rch = isF64 ? 'd' : 'f';
13778       DIP("vrint%c.%s.%s %c%u, %c%u\n",
13779           'x',
13780           isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
13781       return True;
13782    }
13783
13784    /* ----------- V{MAX,MIN}NM{.F32 d_d_d, .F32 q_q_q} ----------- */
13785    /*     31   27    22 21 20 19 15 11   7 6 5 4 3
13786       T1: 1111 11110 D  op 0  Vn Vd 1111 N 1 M 1 Vm  V{MIN,MAX}NM.F32 Qd,Qn,Qm
13787       A1: 1111 00110 D  op 0  Vn Vd 1111 N 1 M 1 Vm
13788
13789       T1: 1111 11110 D  op 0  Vn Vd 1111 N 0 M 1 Vm  V{MIN,MAX}NM.F32 Dd,Dn,Dm
13790       A1: 1111 00110 D  op 0  Vn Vd 1111 N 0 M 1 Vm
13791
13792       ARM encoding is in NV space.
13793       In Thumb mode, we must not be in an IT block.
13794    */
13795    if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,0)
13796                            : BITS9(1,1,1,1,0,0,1,1,0))
13797        && INSN(20,20) == 0 && INSN(11,8) == BITS4(1,1,1,1) && INSN(4,4) == 1) {
13798       UInt bit_D  = INSN(22,22);
13799       Bool isMax  = INSN(21,21) == 0;
13800       UInt fld_Vn = INSN(19,16);
13801       UInt fld_Vd = INSN(15,12);
13802       UInt bit_N  = INSN(7,7);
13803       Bool isQ    = INSN(6,6) == 1;
13804       UInt bit_M  = INSN(5,5);
13805       UInt fld_Vm = INSN(3,0);
13806
13807       /* dd, nn, mm are D-register numbers. */
13808       UInt dd = (bit_D << 4) | fld_Vd;
13809       UInt nn = (bit_N << 4) | fld_Vn;
13810       UInt mm = (bit_M << 4) | fld_Vm;
13811
13812       if (! (isQ && ((dd & 1) == 1 || (nn & 1) == 1 || (mm & 1) == 1))) {
13813          /* Do this piecewise on f regs.  This is a bit tricky
13814             though because we are dealing with the full 16 x Q == 32 x D
13815             register set, so the implied F reg numbers are 0 to 63.  But
13816             ll{Get,Put}FReg only allow the 0 .. 31 as those are the only
13817             architected F regs. */
13818          UInt ddF = dd << 1;
13819          UInt nnF = nn << 1;
13820          UInt mmF = mm << 1;
13821
13822          if (isT) {
13823             gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13824          }
13825          /* In ARM mode, this is statically unconditional.  In Thumb mode,
13826             this must be dynamically unconditional, and we've SIGILLd if not.
13827             In either case we can create unconditional IR. */
13828
13829          IROp op = isMax ? Iop_MaxNumF32 : Iop_MinNumF32;
13830
13831          IRTemp r0 = newTemp(Ity_F32);
13832          IRTemp r1 = newTemp(Ity_F32);
13833          IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13834          IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13835
13836          assign(r0, binop(op, llGetFReg_up_to_64(nnF+0),
13837                               llGetFReg_up_to_64(mmF+0)));
13838          assign(r1, binop(op, llGetFReg_up_to_64(nnF+1),
13839                               llGetFReg_up_to_64(mmF+1)));
13840          if (isQ) {
13841             assign(r2, binop(op, llGetFReg_up_to_64(nnF+2),
13842                                  llGetFReg_up_to_64(mmF+2)));
13843             assign(r3, binop(op, llGetFReg_up_to_64(nnF+3),
13844                                  llGetFReg_up_to_64(mmF+3)));
13845          }
13846          llPutFReg_up_to_64(ddF+0, mkexpr(r0));
13847          llPutFReg_up_to_64(ddF+1, mkexpr(r1));
13848          if (isQ) {
13849             llPutFReg_up_to_64(ddF+2, mkexpr(r2));
13850             llPutFReg_up_to_64(ddF+3, mkexpr(r3));
13851          }
13852
13853          HChar rch = isQ ? 'q' : 'd';
13854          UInt  sh  = isQ ? 1 : 0;
13855          DIP("v%snm.f32 %c%u, %c%u, %c%u\n",
13856               isMax ? "max" : "min", rch,
13857               dd >> sh, rch, nn >> sh, rch, mm >> sh);
13858          return True;
13859       }
13860       /* else fall through */
13861    }
13862
13863    /* ----------- VCVT{A,N,P,M}{.F32 d_d, .F32 q_q} ----------- */
13864    /*     31   27    22 21     15 11 9  7  6 5 4 3
13865       T1: 1111 11111 D  111011 Vd 00 rm op Q M 0 Vm
13866       A1: 1111 00111 D  111011 Vd 00 rm op Q M 0 Vm
13867
13868       ARM encoding is in NV space.
13869       In Thumb mode, we must not be in an IT block.
13870    */
13871    if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,1)
13872                            : BITS9(1,1,1,1,0,0,1,1,1))
13873        && INSN(21,16) == BITS6(1,1,1,0,1,1) && INSN(11,10) == BITS2(0,0)
13874        && INSN(4,4) == 0) {
13875       UInt bit_D  = INSN(22,22);
13876       UInt fld_Vd = INSN(15,12);
13877       UInt fld_rm = INSN(9,8);
13878       Bool isU    = INSN(7,7) == 1;
13879       Bool isQ    = INSN(6,6) == 1;
13880       UInt bit_M  = INSN(5,5);
13881       UInt fld_Vm = INSN(3,0);
13882
13883       /* dd, nn, mm are D-register numbers. */
13884       UInt dd = (bit_D << 4) | fld_Vd;
13885       UInt mm = (bit_M << 4) | fld_Vm;
13886
13887       if (! (isQ && ((dd & 1) == 1 || (mm & 1) == 1))) {
13888          /* Do this piecewise on f regs. */
13889          UInt ddF = dd << 1;
13890          UInt mmF = mm << 1;
13891
13892          if (isT) {
13893             gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13894          }
13895          /* In ARM mode, this is statically unconditional.  In Thumb mode,
13896             this must be dynamically unconditional, and we've SIGILLd if not.
13897             In either case we can create unconditional IR. */
13898
13899          UChar cvtc = '?';
13900          IRRoundingMode rm = Irrm_NEAREST;
13901          switch (fld_rm) {
13902             /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
13903                kludge since it doesn't take into account the nearest-even vs
13904                nearest-away semantics. */
13905             case BITS2(0,0): cvtc = 'a'; rm = Irrm_NEAREST; break;
13906             case BITS2(0,1): cvtc = 'n'; rm = Irrm_NEAREST; break;
13907             case BITS2(1,0): cvtc = 'p'; rm = Irrm_PosINF;  break;
13908             case BITS2(1,1): cvtc = 'm'; rm = Irrm_NegINF;  break;
13909             default: vassert(0);
13910          }
13911
13912          IROp cvt = isU ? Iop_F64toI32U : Iop_F64toI32S;
13913
13914          IRTemp r0 = newTemp(Ity_F32);
13915          IRTemp r1 = newTemp(Ity_F32);
13916          IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13917          IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13918
13919          IRExpr* rmE = mkU32((UInt)rm);
13920
13921          assign(r0, unop(Iop_ReinterpI32asF32,
13922                          binop(cvt, rmE, unop(Iop_F32toF64,
13923                                               llGetFReg_up_to_64(mmF+0)))));
13924          assign(r1, unop(Iop_ReinterpI32asF32,
13925                          binop(cvt, rmE, unop(Iop_F32toF64,
13926                                               llGetFReg_up_to_64(mmF+1)))));
13927          if (isQ) {
13928             assign(r2, unop(Iop_ReinterpI32asF32,
13929                             binop(cvt, rmE, unop(Iop_F32toF64,
13930                                                  llGetFReg_up_to_64(mmF+2)))));
13931             assign(r3, unop(Iop_ReinterpI32asF32,
13932                             binop(cvt, rmE, unop(Iop_F32toF64,
13933                                                  llGetFReg_up_to_64(mmF+3)))));
13934          }
13935
13936          llPutFReg_up_to_64(ddF+0, mkexpr(r0));
13937          llPutFReg_up_to_64(ddF+1, mkexpr(r1));
13938          if (isQ) {
13939             llPutFReg_up_to_64(ddF+2, mkexpr(r2));
13940             llPutFReg_up_to_64(ddF+3, mkexpr(r3));
13941          }
13942
13943          HChar rch = isQ ? 'q' : 'd';
13944          UInt  sh  = isQ ? 1 : 0;
13945          DIP("vcvt%c.%c32.f32 %c%u, %c%u\n",
13946               cvtc, isU ? 'u' : 's', rch, dd >> sh, rch, mm >> sh);
13947          return True;
13948       }
13949       /* else fall through */
13950    }
13951
13952    /* ----------- VRINT{A,N,P,M,X,Z}{.F32 d_d, .F32 q_q} ----------- */
13953    /*     31   27    22 21     15 11 9  6 5 4 3
13954       T1: 1111 11111 D  111010 Vd 01 op Q M 0 Vm
13955       A1: 1111 00111 D  111010 Vd 01 op Q M 0 Vm
13956
13957       ARM encoding is in NV space.
13958       In Thumb mode, we must not be in an IT block.
13959    */
13960    if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,1)
13961                            : BITS9(1,1,1,1,0,0,1,1,1))
13962        && INSN(21,16) == BITS6(1,1,1,0,1,0) && INSN(11,10) == BITS2(0,1)
13963        && INSN(4,4) == 0) {
13964       UInt bit_D  = INSN(22,22);
13965       UInt fld_Vd = INSN(15,12);
13966       UInt fld_op = INSN(9,7);
13967       Bool isQ    = INSN(6,6) == 1;
13968       UInt bit_M  = INSN(5,5);
13969       UInt fld_Vm = INSN(3,0);
13970
13971       /* dd, nn, mm are D-register numbers. */
13972       UInt dd = (bit_D << 4) | fld_Vd;
13973       UInt mm = (bit_M << 4) | fld_Vm;
13974
13975       if (! (fld_op == BITS3(1,0,0) || fld_op == BITS3(1,1,0))
13976           && ! (isQ && ((dd & 1) == 1 || (mm & 1) == 1))) {
13977          /* Do this piecewise on f regs. */
13978          UInt ddF = dd << 1;
13979          UInt mmF = mm << 1;
13980
13981          if (isT) {
13982             gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13983          }
13984          /* In ARM mode, this is statically unconditional.  In Thumb mode,
13985             this must be dynamically unconditional, and we've SIGILLd if not.
13986             In either case we can create unconditional IR. */
13987
13988          UChar cvtc = '?';
13989          IRRoundingMode rm = Irrm_NEAREST;
13990          switch (fld_op) {
13991             /* Various kludges:
13992                - The use of NEAREST for both the 'a' and 'n' cases,
13993                  since it doesn't take into account the nearest-even vs
13994                  nearest-away semantics.
13995                - For the 'x' case, we don't signal inexactness.
13996             */
13997             case BITS3(0,1,0): cvtc = 'a'; rm = Irrm_NEAREST; break;
13998             case BITS3(0,0,0): cvtc = 'n'; rm = Irrm_NEAREST; break;
13999             case BITS3(1,1,1): cvtc = 'p'; rm = Irrm_PosINF;  break;
14000             case BITS3(1,0,1): cvtc = 'm'; rm = Irrm_NegINF;  break;
14001             case BITS3(0,1,1): cvtc = 'z'; rm = Irrm_ZERO;    break;
14002             case BITS3(0,0,1): cvtc = 'x'; rm = Irrm_NEAREST; break;
14003             case BITS3(1,0,0):
14004             case BITS3(1,1,0):
14005             default: vassert(0);
14006          }
14007
14008          IRTemp r0 = newTemp(Ity_F32);
14009          IRTemp r1 = newTemp(Ity_F32);
14010          IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
14011          IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
14012
14013          IRExpr* rmE = mkU32((UInt)rm);
14014          IROp    rnd = Iop_RoundF32toInt;
14015
14016          assign(r0, binop(rnd, rmE, llGetFReg_up_to_64(mmF+0)));
14017          assign(r1, binop(rnd, rmE, llGetFReg_up_to_64(mmF+1)));
14018          if (isQ) {
14019             assign(r2, binop(rnd, rmE, llGetFReg_up_to_64(mmF+2)));
14020             assign(r3, binop(rnd, rmE, llGetFReg_up_to_64(mmF+3)));
14021          }
14022
14023          llPutFReg_up_to_64(ddF+0, mkexpr(r0));
14024          llPutFReg_up_to_64(ddF+1, mkexpr(r1));
14025          if (isQ) {
14026             llPutFReg_up_to_64(ddF+2, mkexpr(r2));
14027             llPutFReg_up_to_64(ddF+3, mkexpr(r3));
14028          }
14029
14030          HChar rch = isQ ? 'q' : 'd';
14031          UInt  sh  = isQ ? 1 : 0;
14032          DIP("vrint%c.f32.f32 %c%u, %c%u\n",
14033              cvtc, rch, dd >> sh, rch, mm >> sh);
14034          return True;
14035       }
14036       /* else fall through */
14037    }
14038
14039    /* ---------- Doesn't match anything. ---------- */
14040    return False;
14041
14042 #  undef INSN
14043 }
14044
14045
14046 /*------------------------------------------------------------*/
14047 /*--- LDMxx/STMxx helper (both ARM and Thumb32)            ---*/
14048 /*------------------------------------------------------------*/
14049
14050 /* Generate IR for LDMxx and STMxx.  This is complex.  Assumes it's
14051    unconditional, so the caller must produce a jump-around before
14052    calling this, if the insn is to be conditional.  Caller is
14053    responsible for all validation of parameters.  For LDMxx, if PC is
14054    amongst the values loaded, caller is also responsible for
14055    generating the jump. */
14056 static void mk_ldm_stm ( Bool arm,     /* True: ARM, False: Thumb */
14057                          UInt rN,      /* base reg */
14058                          UInt bINC,    /* 1: inc,  0: dec */
14059                          UInt bBEFORE, /* 1: inc/dec before, 0: after */
14060                          UInt bW,      /* 1: writeback to Rn */
14061                          UInt bL,      /* 1: load, 0: store */
14062                          UInt regList )
14063 {
14064    Int i, r, m, nRegs;
14065    IRTemp jk = Ijk_Boring;
14066
14067    /* Get hold of the old Rn value.  We might need to write its value
14068       to memory during a store, and if it's also the writeback
14069       register then we need to get its value now.  We can't treat it
14070       exactly like the other registers we're going to transfer,
14071       because for xxMDA and xxMDB writeback forms, the generated IR
14072       updates Rn in the guest state before any transfers take place.
14073       We have to do this as per comments below, in order that if Rn is
14074       the stack pointer then it always has a value is below or equal
14075       to any of the transfer addresses.  Ick. */
14076    IRTemp oldRnT = newTemp(Ity_I32);
14077    assign(oldRnT, arm ? getIRegA(rN) : getIRegT(rN));
14078
14079    IRTemp anchorT = newTemp(Ity_I32);
14080    /* The old (Addison-Wesley) ARM ARM seems to say that LDMxx/STMxx
14081       ignore the bottom two bits of the address.  However, Cortex-A8
14082       doesn't seem to care.  Hence: */
14083    /* No .. don't force alignment .. */
14084    /* assign(anchorT, binop(Iop_And32, mkexpr(oldRnT), mkU32(~3U))); */
14085    /* Instead, use the potentially misaligned address directly. */
14086    assign(anchorT, mkexpr(oldRnT));
14087
14088    IROp opADDorSUB = bINC ? Iop_Add32 : Iop_Sub32;
14089    // bINC == 1:  xxMIA, xxMIB
14090    // bINC == 0:  xxMDA, xxMDB
14091
14092    // For xxMDA and xxMDB, update Rn first if necessary.  We have
14093    // to do this first so that, for the common idiom of the transfers
14094    // faulting because we're pushing stuff onto a stack and the stack
14095    // is growing down onto allocate-on-fault pages (as Valgrind simulates),
14096    // we need to have the SP up-to-date "covering" (pointing below) the
14097    // transfer area.  For the same reason, if we are doing xxMIA or xxMIB,
14098    // do the transfer first, and then update rN afterwards.
14099    nRegs = 0;
14100    for (i = 0; i < 16; i++) {
14101      if ((regList & (1 << i)) != 0)
14102          nRegs++;
14103    }
14104    if (bW == 1 && !bINC) {
14105       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
14106       if (arm)
14107          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
14108       else
14109          putIRegT( rN, e, IRTemp_INVALID );
14110    }
14111
14112    // Make up a list of the registers to transfer, and their offsets
14113    // in memory relative to the anchor.  If the base reg (Rn) is part
14114    // of the transfer, then do it last for a load and first for a store.
14115    UInt xReg[16], xOff[16];
14116    Int  nX = 0;
14117    m = 0;
14118    for (i = 0; i < 16; i++) {
14119       r = bINC ? i : (15-i);
14120       if (0 == (regList & (1<<r)))
14121          continue;
14122       if (bBEFORE)
14123          m++;
14124       /* paranoia: check we aren't transferring the writeback
14125          register during a load. Should be assured by decode-point
14126          check above. */
14127       if (bW == 1 && bL == 1)
14128          vassert(r != rN);
14129
14130       xOff[nX] = 4 * m;
14131       xReg[nX] = r;
14132       nX++;
14133
14134       if (!bBEFORE)
14135          m++;
14136    }
14137    vassert(m == nRegs);
14138    vassert(nX == nRegs);
14139    vassert(nX <= 16);
14140
14141    if (bW == 0 && (regList & (1<<rN)) != 0) {
14142       /* Non-writeback, and basereg is to be transferred.  Do its
14143          transfer last for a load and first for a store.  Requires
14144          reordering xOff/xReg. */
14145       if (0) {
14146          vex_printf("\nREG_LIST_PRE: (rN=%u)\n", rN);
14147          for (i = 0; i < nX; i++)
14148             vex_printf("reg %u   off %u\n", xReg[i], xOff[i]);
14149          vex_printf("\n");
14150       }
14151
14152       vassert(nX > 0);
14153       for (i = 0; i < nX; i++) {
14154          if (xReg[i] == rN)
14155              break;
14156       }
14157       vassert(i < nX); /* else we didn't find it! */
14158       UInt tReg = xReg[i];
14159       UInt tOff = xOff[i];
14160       if (bL == 1) {
14161          /* load; make this transfer happen last */
14162          if (i < nX-1) {
14163             for (m = i+1; m < nX; m++) {
14164                xReg[m-1] = xReg[m];
14165                xOff[m-1] = xOff[m];
14166             }
14167             vassert(m == nX);
14168             xReg[m-1] = tReg;
14169             xOff[m-1] = tOff;
14170          }
14171       } else {
14172          /* store; make this transfer happen first */
14173          if (i > 0) {
14174             for (m = i-1; m >= 0; m--) {
14175                xReg[m+1] = xReg[m];
14176                xOff[m+1] = xOff[m];
14177             }
14178             vassert(m == -1);
14179             xReg[0] = tReg;
14180             xOff[0] = tOff;
14181          }
14182       }
14183
14184       if (0) {
14185          vex_printf("REG_LIST_POST:\n");
14186          for (i = 0; i < nX; i++)
14187             vex_printf("reg %u   off %u\n", xReg[i], xOff[i]);
14188          vex_printf("\n");
14189       }
14190    }
14191
14192    /* According to the Cortex A8 TRM Sec. 5.2.1, LDM(1) with r13 as the base
14193        register and PC in the register list is a return for purposes of branch
14194        prediction.
14195       The ARM ARM Sec. C9.10.1 further specifies that writeback must be enabled
14196        to be counted in event 0x0E (Procedure return).*/
14197    if (rN == 13 && bL == 1 && bINC && !bBEFORE && bW == 1) {
14198       jk = Ijk_Ret;
14199    }
14200
14201    /* Actually generate the transfers */
14202    for (i = 0; i < nX; i++) {
14203       r = xReg[i];
14204       if (bL == 1) {
14205          IRExpr* e = loadLE(Ity_I32,
14206                             binop(opADDorSUB, mkexpr(anchorT),
14207                                   mkU32(xOff[i])));
14208          if (arm) {
14209             putIRegA( r, e, IRTemp_INVALID, jk );
14210          } else {
14211             // no: putIRegT( r, e, IRTemp_INVALID );
14212             // putIRegT refuses to write to R15.  But that might happen.
14213             // Since this is uncond, and we need to be able to
14214             // write the PC, just use the low level put:
14215             llPutIReg( r, e );
14216          }
14217       } else {
14218          /* if we're storing Rn, make sure we use the correct
14219             value, as per extensive comments above */
14220          storeLE( binop(opADDorSUB, mkexpr(anchorT), mkU32(xOff[i])),
14221                   r == rN ? mkexpr(oldRnT)
14222                           : (arm ? getIRegA(r) : getIRegT(r) ) );
14223       }
14224    }
14225
14226    // If we are doing xxMIA or xxMIB,
14227    // do the transfer first, and then update rN afterwards.
14228    if (bW == 1 && bINC) {
14229       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
14230       if (arm)
14231          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
14232       else
14233          putIRegT( rN, e, IRTemp_INVALID );
14234    }
14235 }
14236
14237
14238 /*------------------------------------------------------------*/
14239 /*--- VFP (CP 10 and 11) instructions                      ---*/
14240 /*------------------------------------------------------------*/
14241
14242 /* Both ARM and Thumb */
14243
14244 /* Translate a CP10 or CP11 instruction.  If successful, returns
14245    True and *dres may or may not be updated.  If failure, returns
14246    False and doesn't change *dres nor create any IR.
14247
14248    The ARM and Thumb encodings are identical for the low 28 bits of
14249    the insn (yay!) and that's what the caller must supply, iow, imm28
14250    has the top 4 bits masked out.  Caller is responsible for
14251    determining whether the masked-out bits are valid for a CP10/11
14252    insn.  The rules for the top 4 bits are:
14253
14254      ARM: 0000 to 1110 allowed, and this is the gating condition.
14255      1111 (NV) is not allowed.
14256
14257      Thumb: must be 1110.  The gating condition is taken from
14258      ITSTATE in the normal way.
14259
14260    Conditionalisation:
14261
14262    Caller must supply an IRTemp 'condT' holding the gating condition,
14263    or IRTemp_INVALID indicating the insn is always executed.
14264
14265    Caller must also supply an ARMCondcode 'cond'.  This is only used
14266    for debug printing, no other purpose.  For ARM, this is simply the
14267    top 4 bits of the original instruction.  For Thumb, the condition
14268    is not (really) known until run time, and so ARMCondAL should be
14269    passed, only so that printing of these instructions does not show
14270    any condition.
14271
14272    Finally, the caller must indicate whether this occurs in ARM or
14273    Thumb code.
14274 */
14275 static Bool decode_CP10_CP11_instruction (
14276                /*MOD*/DisResult* dres,
14277                UInt              insn28,
14278                IRTemp            condT,
14279                ARMCondcode       conq,
14280                Bool              isT
14281             )
14282 {
14283 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn28, (_bMax), (_bMin))
14284
14285    vassert(INSN(31,28) == BITS4(0,0,0,0)); // caller's obligation
14286
14287    if (isT) {
14288       vassert(conq == ARMCondAL);
14289    } else {
14290       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
14291    }
14292
14293    /* ----------------------------------------------------------- */
14294    /* -- VFP instructions -- double precision (mostly)         -- */
14295    /* ----------------------------------------------------------- */
14296
14297    /* --------------------- fldmx, fstmx --------------------- */
14298    /*
14299                                  31   27   23   19 15 11   7   0
14300                                          P U WL
14301       C4-100, C5-26  1  FSTMX    cond 1100 1000 Rn Dd 1011 offset
14302       C4-100, C5-28  2  FSTMIAX  cond 1100 1010 Rn Dd 1011 offset
14303       C4-100, C5-30  3  FSTMDBX  cond 1101 0010 Rn Dd 1011 offset
14304
14305       C4-42, C5-26   1  FLDMX    cond 1100 1001 Rn Dd 1011 offset
14306       C4-42, C5-28   2  FLDMIAX  cond 1100 1011 Rn Dd 1011 offset
14307       C4-42, C5-30   3  FLDMDBX  cond 1101 0011 Rn Dd 1011 offset
14308
14309       Regs transferred: Dd .. D(d + (offset-3)/2)
14310       offset must be odd, must not imply a reg > 15
14311       IA/DB: Rn is changed by (4 + 8 x # regs transferred)
14312
14313       case coding:
14314          1  at-Rn   (access at Rn)
14315          2  ia-Rn   (access at Rn, then Rn += 4+8n)
14316          3  db-Rn   (Rn -= 4+8n,   then access at Rn)
14317    */
14318    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
14319        && INSN(11,8) == BITS4(1,0,1,1)) {
14320       UInt bP      = (insn28 >> 24) & 1;
14321       UInt bU      = (insn28 >> 23) & 1;
14322       UInt bW      = (insn28 >> 21) & 1;
14323       UInt bL      = (insn28 >> 20) & 1;
14324       UInt offset  = (insn28 >> 0) & 0xFF;
14325       UInt rN      = INSN(19,16);
14326       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
14327       UInt nRegs   = (offset - 1) / 2;
14328       UInt summary = 0;
14329       Int  i;
14330
14331       /**/ if (bP == 0 && bU == 1 && bW == 0) {
14332          summary = 1;
14333       }
14334       else if (bP == 0 && bU == 1 && bW == 1) {
14335          summary = 2;
14336       }
14337       else if (bP == 1 && bU == 0 && bW == 1) {
14338          summary = 3;
14339       }
14340       else goto after_vfp_fldmx_fstmx;
14341
14342       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
14343       if (rN == 15 && (summary == 2 || summary == 3 || isT))
14344          goto after_vfp_fldmx_fstmx;
14345
14346       /* offset must be odd, and specify at least one register */
14347       if (0 == (offset & 1) || offset < 3)
14348          goto after_vfp_fldmx_fstmx;
14349
14350       /* can't transfer regs after D15 */
14351       if (dD + nRegs - 1 >= 32)
14352          goto after_vfp_fldmx_fstmx;
14353
14354       /* Now, we can't do a conditional load or store, since that very
14355          likely will generate an exception.  So we have to take a side
14356          exit at this point if the condition is false. */
14357       if (condT != IRTemp_INVALID) {
14358          if (isT)
14359             mk_skip_over_T32_if_cond_is_false( condT );
14360          else
14361             mk_skip_over_A32_if_cond_is_false( condT );
14362          condT = IRTemp_INVALID;
14363       }
14364       /* Ok, now we're unconditional.  Do the load or store. */
14365
14366       /* get the old Rn value */
14367       IRTemp rnT = newTemp(Ity_I32);
14368       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
14369                            rN == 15));
14370
14371       /* make a new value for Rn, post-insn */
14372       IRTemp rnTnew = IRTemp_INVALID;
14373       if (summary == 2 || summary == 3) {
14374          rnTnew = newTemp(Ity_I32);
14375          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
14376                               mkexpr(rnT),
14377                               mkU32(4 + 8 * nRegs)));
14378       }
14379
14380       /* decide on the base transfer address */
14381       IRTemp taT = newTemp(Ity_I32);
14382       assign(taT,  summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
14383
14384       /* update Rn if necessary -- in case 3, we're moving it down, so
14385          update before any memory reference, in order to keep Memcheck
14386          and V's stack-extending logic (on linux) happy */
14387       if (summary == 3) {
14388          if (isT)
14389             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14390          else
14391             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14392       }
14393
14394       /* generate the transfers */
14395       for (i = 0; i < nRegs; i++) {
14396          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
14397          if (bL) {
14398             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
14399          } else {
14400             storeLE(addr, getDReg(dD + i));
14401          }
14402       }
14403
14404       /* update Rn if necessary -- in case 2, we're moving it up, so
14405          update after any memory reference, in order to keep Memcheck
14406          and V's stack-extending logic (on linux) happy */
14407       if (summary == 2) {
14408          if (isT)
14409             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14410          else
14411             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14412       }
14413
14414       const HChar* nm = bL==1 ? "ld" : "st";
14415       switch (summary) {
14416          case 1:  DIP("f%smx%s r%u, {d%u-d%u}\n",
14417                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14418                   break;
14419          case 2:  DIP("f%smiax%s r%u!, {d%u-d%u}\n",
14420                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14421                   break;
14422          case 3:  DIP("f%smdbx%s r%u!, {d%u-d%u}\n",
14423                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14424                   break;
14425          default: vassert(0);
14426       }
14427
14428       goto decode_success_vfp;
14429       /* FIXME alignment constraints? */
14430    }
14431
14432   after_vfp_fldmx_fstmx:
14433
14434    /* --------------------- fldmd, fstmd --------------------- */
14435    /*
14436                                  31   27   23   19 15 11   7   0
14437                                          P U WL
14438       C4-96, C5-26   1  FSTMD    cond 1100 1000 Rn Dd 1011 offset
14439       C4-96, C5-28   2  FSTMDIA  cond 1100 1010 Rn Dd 1011 offset
14440       C4-96, C5-30   3  FSTMDDB  cond 1101 0010 Rn Dd 1011 offset
14441
14442       C4-38, C5-26   1  FLDMD    cond 1100 1001 Rn Dd 1011 offset
14443       C4-38, C5-28   2  FLDMIAD  cond 1100 1011 Rn Dd 1011 offset
14444       C4-38, C5-30   3  FLDMDBD  cond 1101 0011 Rn Dd 1011 offset
14445
14446       Regs transferred: Dd .. D(d + (offset-2)/2)
14447       offset must be even, must not imply a reg > 15
14448       IA/DB: Rn is changed by (8 x # regs transferred)
14449
14450       case coding:
14451          1  at-Rn   (access at Rn)
14452          2  ia-Rn   (access at Rn, then Rn += 8n)
14453          3  db-Rn   (Rn -= 8n,     then access at Rn)
14454    */
14455    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
14456        && INSN(11,8) == BITS4(1,0,1,1)) {
14457       UInt bP      = (insn28 >> 24) & 1;
14458       UInt bU      = (insn28 >> 23) & 1;
14459       UInt bW      = (insn28 >> 21) & 1;
14460       UInt bL      = (insn28 >> 20) & 1;
14461       UInt offset  = (insn28 >> 0) & 0xFF;
14462       UInt rN      = INSN(19,16);
14463       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
14464       UInt nRegs   = offset / 2;
14465       UInt summary = 0;
14466       Int  i;
14467
14468       /**/ if (bP == 0 && bU == 1 && bW == 0) {
14469          summary = 1;
14470       }
14471       else if (bP == 0 && bU == 1 && bW == 1) {
14472          summary = 2;
14473       }
14474       else if (bP == 1 && bU == 0 && bW == 1) {
14475          summary = 3;
14476       }
14477       else goto after_vfp_fldmd_fstmd;
14478
14479       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
14480       if (rN == 15 && (summary == 2 || summary == 3 || isT))
14481          goto after_vfp_fldmd_fstmd;
14482
14483       /* offset must be even, and specify at least one register */
14484       if (1 == (offset & 1) || offset < 2)
14485          goto after_vfp_fldmd_fstmd;
14486
14487       /* can't transfer regs after D15 */
14488       if (dD + nRegs - 1 >= 32)
14489          goto after_vfp_fldmd_fstmd;
14490
14491       /* Now, we can't do a conditional load or store, since that very
14492          likely will generate an exception.  So we have to take a side
14493          exit at this point if the condition is false. */
14494       if (condT != IRTemp_INVALID) {
14495          if (isT)
14496             mk_skip_over_T32_if_cond_is_false( condT );
14497          else
14498             mk_skip_over_A32_if_cond_is_false( condT );
14499          condT = IRTemp_INVALID;
14500       }
14501       /* Ok, now we're unconditional.  Do the load or store. */
14502
14503       /* get the old Rn value */
14504       IRTemp rnT = newTemp(Ity_I32);
14505       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
14506                            rN == 15));
14507
14508       /* make a new value for Rn, post-insn */
14509       IRTemp rnTnew = IRTemp_INVALID;
14510       if (summary == 2 || summary == 3) {
14511          rnTnew = newTemp(Ity_I32);
14512          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
14513                               mkexpr(rnT),
14514                               mkU32(8 * nRegs)));
14515       }
14516
14517       /* decide on the base transfer address */
14518       IRTemp taT = newTemp(Ity_I32);
14519       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
14520
14521       /* update Rn if necessary -- in case 3, we're moving it down, so
14522          update before any memory reference, in order to keep Memcheck
14523          and V's stack-extending logic (on linux) happy */
14524       if (summary == 3) {
14525          if (isT)
14526             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14527          else
14528             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14529       }
14530
14531       /* generate the transfers */
14532       for (i = 0; i < nRegs; i++) {
14533          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
14534          if (bL) {
14535             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
14536          } else {
14537             storeLE(addr, getDReg(dD + i));
14538          }
14539       }
14540
14541       /* update Rn if necessary -- in case 2, we're moving it up, so
14542          update after any memory reference, in order to keep Memcheck
14543          and V's stack-extending logic (on linux) happy */
14544       if (summary == 2) {
14545          if (isT)
14546             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14547          else
14548             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14549       }
14550
14551       const HChar* nm = bL==1 ? "ld" : "st";
14552       switch (summary) {
14553          case 1:  DIP("f%smd%s r%u, {d%u-d%u}\n",
14554                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14555                   break;
14556          case 2:  DIP("f%smiad%s r%u!, {d%u-d%u}\n",
14557                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14558                   break;
14559          case 3:  DIP("f%smdbd%s r%u!, {d%u-d%u}\n",
14560                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14561                   break;
14562          default: vassert(0);
14563       }
14564
14565       goto decode_success_vfp;
14566       /* FIXME alignment constraints? */
14567    }
14568
14569   after_vfp_fldmd_fstmd:
14570
14571    /* ------------------- fmrx, fmxr ------------------- */
14572    if (BITS8(1,1,1,0,1,1,1,1) == INSN(27,20)
14573        && BITS4(1,0,1,0) == INSN(11,8)
14574        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
14575       UInt rD  = INSN(15,12);
14576       UInt reg = INSN(19,16);
14577       if (reg == BITS4(0,0,0,1)) {
14578          if (rD == 15) {
14579             IRTemp nzcvT = newTemp(Ity_I32);
14580             /* When rD is 15, we are copying the top 4 bits of FPSCR
14581                into CPSR.  That is, set the flags thunk to COPY and
14582                install FPSCR[31:28] as the value to copy. */
14583             assign(nzcvT, binop(Iop_And32,
14584                                 IRExpr_Get(OFFB_FPSCR, Ity_I32),
14585                                 mkU32(0xF0000000)));
14586             setFlags_D1(ARMG_CC_OP_COPY, nzcvT, condT);
14587             DIP("fmstat%s\n", nCC(conq));
14588          } else {
14589             /* Otherwise, merely transfer FPSCR to r0 .. r14. */
14590             IRExpr* e = IRExpr_Get(OFFB_FPSCR, Ity_I32);
14591             if (isT)
14592                putIRegT(rD, e, condT);
14593             else
14594                putIRegA(rD, e, condT, Ijk_Boring);
14595             DIP("fmrx%s r%u, fpscr\n", nCC(conq), rD);
14596          }
14597          goto decode_success_vfp;
14598       }
14599       /* fall through */
14600    }
14601
14602    if (BITS8(1,1,1,0,1,1,1,0) == INSN(27,20)
14603        && BITS4(1,0,1,0) == INSN(11,8)
14604        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
14605       UInt rD  = INSN(15,12);
14606       UInt reg = INSN(19,16);
14607       if (reg == BITS4(0,0,0,1)) {
14608          putMiscReg32(OFFB_FPSCR,
14609                       isT ? getIRegT(rD) : getIRegA(rD), condT);
14610          DIP("fmxr%s fpscr, r%u\n", nCC(conq), rD);
14611          goto decode_success_vfp;
14612       }
14613       /* fall through */
14614    }
14615
14616    /* --------------------- vmov --------------------- */
14617    // VMOV dM, rD, rN
14618    if (0x0C400B10 == (insn28 & 0x0FF00FD0)) {
14619       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
14620       UInt rD = INSN(15,12); /* lo32 */
14621       UInt rN = INSN(19,16); /* hi32 */
14622       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))) {
14623          /* fall through */
14624       } else {
14625          putDReg(dM,
14626                  unop(Iop_ReinterpI64asF64,
14627                       binop(Iop_32HLto64,
14628                             isT ? getIRegT(rN) : getIRegA(rN),
14629                             isT ? getIRegT(rD) : getIRegA(rD))),
14630                  condT);
14631          DIP("vmov%s d%u, r%u, r%u\n", nCC(conq), dM, rD, rN);
14632          goto decode_success_vfp;
14633       }
14634       /* fall through */
14635    }
14636
14637    // VMOV rD, rN, dM
14638    if (0x0C500B10 == (insn28 & 0x0FF00FD0)) {
14639       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
14640       UInt rD = INSN(15,12); /* lo32 */
14641       UInt rN = INSN(19,16); /* hi32 */
14642       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))
14643           || rD == rN) {
14644          /* fall through */
14645       } else {
14646          IRTemp i64 = newTemp(Ity_I64);
14647          assign(i64, unop(Iop_ReinterpF64asI64, getDReg(dM)));
14648          IRExpr* hi32 = unop(Iop_64HIto32, mkexpr(i64));
14649          IRExpr* lo32 = unop(Iop_64to32,   mkexpr(i64));
14650          if (isT) {
14651             putIRegT(rN, hi32, condT);
14652             putIRegT(rD, lo32, condT);
14653          } else {
14654             putIRegA(rN, hi32, condT, Ijk_Boring);
14655             putIRegA(rD, lo32, condT, Ijk_Boring);
14656          }
14657          DIP("vmov%s r%u, r%u, d%u\n", nCC(conq), rD, rN, dM);
14658          goto decode_success_vfp;
14659       }
14660       /* fall through */
14661    }
14662
14663    // VMOV sD, sD+1, rN, rM
14664    if (0x0C400A10 == (insn28 & 0x0FF00FD0)) {
14665       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
14666       UInt rN = INSN(15,12);
14667       UInt rM = INSN(19,16);
14668       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
14669           || sD == 31) {
14670          /* fall through */
14671       } else {
14672          putFReg(sD,
14673                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rN) : getIRegA(rN)),
14674                  condT);
14675          putFReg(sD+1,
14676                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rM) : getIRegA(rM)),
14677                  condT);
14678          DIP("vmov%s, s%u, s%u, r%u, r%u\n",
14679               nCC(conq), sD, sD + 1, rN, rM);
14680          goto decode_success_vfp;
14681       }
14682    }
14683
14684    // VMOV rN, rM, sD, sD+1
14685    if (0x0C500A10 == (insn28 & 0x0FF00FD0)) {
14686       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
14687       UInt rN = INSN(15,12);
14688       UInt rM = INSN(19,16);
14689       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
14690           || sD == 31 || rN == rM) {
14691          /* fall through */
14692       } else {
14693          IRExpr* res0 = unop(Iop_ReinterpF32asI32, getFReg(sD));
14694          IRExpr* res1 = unop(Iop_ReinterpF32asI32, getFReg(sD+1));
14695          if (isT) {
14696             putIRegT(rN, res0, condT);
14697             putIRegT(rM, res1, condT);
14698          } else {
14699             putIRegA(rN, res0, condT, Ijk_Boring);
14700             putIRegA(rM, res1, condT, Ijk_Boring);
14701          }
14702          DIP("vmov%s, r%u, r%u, s%u, s%u\n",
14703              nCC(conq), rN, rM, sD, sD + 1);
14704          goto decode_success_vfp;
14705       }
14706    }
14707
14708    // VMOV rD[x], rT  (ARM core register to scalar)
14709    if (0x0E000B10 == (insn28 & 0x0F900F1F)) {
14710       UInt rD  = (INSN(7,7) << 4) | INSN(19,16);
14711       UInt rT  = INSN(15,12);
14712       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
14713       UInt index;
14714       if (rT == 15 || (isT && rT == 13)) {
14715          /* fall through */
14716       } else {
14717          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
14718             index = opc & 7;
14719             putDRegI64(rD, triop(Iop_SetElem8x8,
14720                                  getDRegI64(rD),
14721                                  mkU8(index),
14722                                  unop(Iop_32to8,
14723                                       isT ? getIRegT(rT) : getIRegA(rT))),
14724                            condT);
14725             DIP("vmov%s.8 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
14726             goto decode_success_vfp;
14727          }
14728          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
14729             index = (opc >> 1) & 3;
14730             putDRegI64(rD, triop(Iop_SetElem16x4,
14731                                  getDRegI64(rD),
14732                                  mkU8(index),
14733                                  unop(Iop_32to16,
14734                                       isT ? getIRegT(rT) : getIRegA(rT))),
14735                            condT);
14736             DIP("vmov%s.16 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
14737             goto decode_success_vfp;
14738          }
14739          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0)) {
14740             index = (opc >> 2) & 1;
14741             putDRegI64(rD, triop(Iop_SetElem32x2,
14742                                  getDRegI64(rD),
14743                                  mkU8(index),
14744                                  isT ? getIRegT(rT) : getIRegA(rT)),
14745                            condT);
14746             DIP("vmov%s.32 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
14747             goto decode_success_vfp;
14748          } else {
14749             /* fall through */
14750          }
14751       }
14752    }
14753
14754    // VMOV (scalar to ARM core register)
14755    // VMOV rT, rD[x]
14756    if (0x0E100B10 == (insn28 & 0x0F100F1F)) {
14757       UInt rN  = (INSN(7,7) << 4) | INSN(19,16);
14758       UInt rT  = INSN(15,12);
14759       UInt U   = INSN(23,23);
14760       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
14761       UInt index;
14762       if (rT == 15 || (isT && rT == 13)) {
14763          /* fall through */
14764       } else {
14765          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
14766             index = opc & 7;
14767             IRExpr* e = unop(U ? Iop_8Uto32 : Iop_8Sto32,
14768                              binop(Iop_GetElem8x8,
14769                                    getDRegI64(rN),
14770                                    mkU8(index)));
14771             if (isT)
14772                putIRegT(rT, e, condT);
14773             else
14774                putIRegA(rT, e, condT, Ijk_Boring);
14775             DIP("vmov%s.%c8 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
14776                   rT, rN, index);
14777             goto decode_success_vfp;
14778          }
14779          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
14780             index = (opc >> 1) & 3;
14781             IRExpr* e = unop(U ? Iop_16Uto32 : Iop_16Sto32,
14782                              binop(Iop_GetElem16x4,
14783                                    getDRegI64(rN),
14784                                    mkU8(index)));
14785             if (isT)
14786                putIRegT(rT, e, condT);
14787             else
14788                putIRegA(rT, e, condT, Ijk_Boring);
14789             DIP("vmov%s.%c16 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
14790                   rT, rN, index);
14791             goto decode_success_vfp;
14792          }
14793          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0) && U == 0) {
14794             index = (opc >> 2) & 1;
14795             IRExpr* e = binop(Iop_GetElem32x2, getDRegI64(rN), mkU8(index));
14796             if (isT)
14797                putIRegT(rT, e, condT);
14798             else
14799                putIRegA(rT, e, condT, Ijk_Boring);
14800             DIP("vmov%s.32 r%u, d%u[%u]\n", nCC(conq), rT, rN, index);
14801             goto decode_success_vfp;
14802          } else {
14803             /* fall through */
14804          }
14805       }
14806    }
14807
14808    // VMOV.F32 sD, #imm
14809    // FCONSTS sD, #imm
14810    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14811        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,0)) {
14812       UInt rD   = (INSN(15,12) << 1) | INSN(22,22);
14813       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
14814       UInt b    = (imm8 >> 6) & 1;
14815       UInt imm;
14816       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,(imm8 >> 5) & 1) << 8)
14817              | ((imm8 & 0x1f) << 3);
14818       imm <<= 16;
14819       putFReg(rD, unop(Iop_ReinterpI32asF32, mkU32(imm)), condT);
14820       DIP("fconsts%s s%u #%u", nCC(conq), rD, imm8);
14821       goto decode_success_vfp;
14822    }
14823
14824    // VMOV.F64 dD, #imm
14825    // FCONSTD dD, #imm
14826    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14827        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,1)) {
14828       UInt rD   = INSN(15,12) | (INSN(22,22) << 4);
14829       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
14830       UInt b    = (imm8 >> 6) & 1;
14831       ULong imm;
14832       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,b) << 8)
14833              | BITS8(b,b,0,0,0,0,0,0) | (imm8 & 0x3f);
14834       imm <<= 48;
14835       putDReg(rD, unop(Iop_ReinterpI64asF64, mkU64(imm)), condT);
14836       DIP("fconstd%s d%u #%u", nCC(conq), rD, imm8);
14837       goto decode_success_vfp;
14838    }
14839
14840    /* ---------------------- vdup ------------------------- */
14841    // VDUP dD, rT
14842    // VDUP qD, rT
14843    if (BITS8(1,1,1,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,1))
14844        && BITS4(1,0,1,1) == INSN(11,8) && INSN(6,6) == 0 && INSN(4,4) == 1) {
14845       UInt rD   = (INSN(7,7) << 4) | INSN(19,16);
14846       UInt rT   = INSN(15,12);
14847       UInt Q    = INSN(21,21);
14848       UInt size = (INSN(22,22) << 1) | INSN(5,5);
14849       if (rT == 15 || (isT && rT == 13) || size == 3 || (Q && (rD & 1))) {
14850          /* fall through */
14851       } else {
14852          IRExpr* e = isT ? getIRegT(rT) : getIRegA(rT);
14853          if (Q) {
14854             rD >>= 1;
14855             switch (size) {
14856                case 0:
14857                   putQReg(rD, unop(Iop_Dup32x4, e), condT);
14858                   break;
14859                case 1:
14860                   putQReg(rD, unop(Iop_Dup16x8, unop(Iop_32to16, e)),
14861                               condT);
14862                   break;
14863                case 2:
14864                   putQReg(rD, unop(Iop_Dup8x16, unop(Iop_32to8, e)),
14865                               condT);
14866                   break;
14867                default:
14868                   vassert(0);
14869             }
14870             DIP("vdup.%d q%u, r%u\n", 32 / (1<<size), rD, rT);
14871          } else {
14872             switch (size) {
14873                case 0:
14874                   putDRegI64(rD, unop(Iop_Dup32x2, e), condT);
14875                   break;
14876                case 1:
14877                   putDRegI64(rD, unop(Iop_Dup16x4, unop(Iop_32to16, e)),
14878                                condT);
14879                   break;
14880                case 2:
14881                   putDRegI64(rD, unop(Iop_Dup8x8, unop(Iop_32to8, e)),
14882                                condT);
14883                   break;
14884                default:
14885                   vassert(0);
14886             }
14887             DIP("vdup.%d d%u, r%u\n", 32 / (1<<size), rD, rT);
14888          }
14889          goto decode_success_vfp;
14890       }
14891    }
14892
14893    /* --------------------- f{ld,st}d --------------------- */
14894    // FLDD, FSTD
14895    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
14896        && BITS4(1,0,1,1) == INSN(11,8)) {
14897       UInt dD     = INSN(15,12) | (INSN(22,22) << 4);
14898       UInt rN     = INSN(19,16);
14899       UInt offset = (insn28 & 0xFF) << 2;
14900       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
14901       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
14902       /* make unconditional */
14903       if (condT != IRTemp_INVALID) {
14904          if (isT)
14905             mk_skip_over_T32_if_cond_is_false( condT );
14906          else
14907             mk_skip_over_A32_if_cond_is_false( condT );
14908          condT = IRTemp_INVALID;
14909       }
14910       IRTemp ea = newTemp(Ity_I32);
14911       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
14912                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
14913                                 rN == 15),
14914                        mkU32(offset)));
14915       if (bL) {
14916          putDReg(dD, loadLE(Ity_F64,mkexpr(ea)), IRTemp_INVALID);
14917       } else {
14918          storeLE(mkexpr(ea), getDReg(dD));
14919       }
14920       DIP("f%sd%s d%u, [r%u, %c#%u]\n",
14921           bL ? "ld" : "st", nCC(conq), dD, rN,
14922           bU ? '+' : '-', offset);
14923       goto decode_success_vfp;
14924    }
14925
14926    /* --------------------- dp insns (D) --------------------- */
14927    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
14928        && BITS4(1,0,1,1) == INSN(11,8)
14929        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
14930       UInt    dM  = INSN(3,0)   | (INSN(5,5) << 4);       /* argR */
14931       UInt    dD  = INSN(15,12) | (INSN(22,22) << 4);   /* dst/acc */
14932       UInt    dN  = INSN(19,16) | (INSN(7,7) << 4);     /* argL */
14933       UInt    bP  = (insn28 >> 23) & 1;
14934       UInt    bQ  = (insn28 >> 21) & 1;
14935       UInt    bR  = (insn28 >> 20) & 1;
14936       UInt    bS  = (insn28 >> 6) & 1;
14937       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
14938       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
14939       switch (opc) {
14940          case BITS4(0,0,0,0): /* MAC: d + n * m */
14941             putDReg(dD, triop(Iop_AddF64, rm,
14942                               getDReg(dD),
14943                               triop(Iop_MulF64, rm, getDReg(dN),
14944                                                     getDReg(dM))),
14945                         condT);
14946             DIP("fmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14947             goto decode_success_vfp;
14948          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
14949             putDReg(dD, triop(Iop_AddF64, rm,
14950                               getDReg(dD),
14951                               unop(Iop_NegF64,
14952                                    triop(Iop_MulF64, rm, getDReg(dN),
14953                                                          getDReg(dM)))),
14954                         condT);
14955             DIP("fnmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14956             goto decode_success_vfp;
14957          case BITS4(0,0,1,0): /* MSC: - d + n * m */
14958             putDReg(dD, triop(Iop_AddF64, rm,
14959                               unop(Iop_NegF64, getDReg(dD)),
14960                               triop(Iop_MulF64, rm, getDReg(dN),
14961                                                     getDReg(dM))),
14962                         condT);
14963             DIP("fmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14964             goto decode_success_vfp;
14965          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
14966             putDReg(dD, triop(Iop_AddF64, rm,
14967                               unop(Iop_NegF64, getDReg(dD)),
14968                               unop(Iop_NegF64,
14969                                    triop(Iop_MulF64, rm, getDReg(dN),
14970                                                          getDReg(dM)))),
14971                         condT);
14972             DIP("fnmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14973             goto decode_success_vfp;
14974          case BITS4(0,1,0,0): /* MUL: n * m */
14975             putDReg(dD, triop(Iop_MulF64, rm, getDReg(dN), getDReg(dM)),
14976                         condT);
14977             DIP("fmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14978             goto decode_success_vfp;
14979          case BITS4(0,1,0,1): /* NMUL: - n * m */
14980             putDReg(dD, unop(Iop_NegF64,
14981                              triop(Iop_MulF64, rm, getDReg(dN),
14982                                                    getDReg(dM))),
14983                     condT);
14984             DIP("fnmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14985             goto decode_success_vfp;
14986          case BITS4(0,1,1,0): /* ADD: n + m */
14987             putDReg(dD, triop(Iop_AddF64, rm, getDReg(dN), getDReg(dM)),
14988                         condT);
14989             DIP("faddd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14990             goto decode_success_vfp;
14991          case BITS4(0,1,1,1): /* SUB: n - m */
14992             putDReg(dD, triop(Iop_SubF64, rm, getDReg(dN), getDReg(dM)),
14993                         condT);
14994             DIP("fsubd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14995             goto decode_success_vfp;
14996          case BITS4(1,0,0,0): /* DIV: n / m */
14997             putDReg(dD, triop(Iop_DivF64, rm, getDReg(dN), getDReg(dM)),
14998                         condT);
14999             DIP("fdivd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15000             goto decode_success_vfp;
15001          case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
15002             /* XXXROUNDINGFIXME look up ARM reference for fused
15003                multiply-add rounding */
15004             putDReg(dD, triop(Iop_AddF64, rm,
15005                               unop(Iop_NegF64, getDReg(dD)),
15006                               triop(Iop_MulF64, rm,
15007                                                 getDReg(dN),
15008                                                 getDReg(dM))),
15009                         condT);
15010             DIP("vfnmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15011             goto decode_success_vfp;
15012          case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
15013             /* XXXROUNDINGFIXME look up ARM reference for fused
15014                multiply-add rounding */
15015             putDReg(dD, triop(Iop_AddF64, rm,
15016                               unop(Iop_NegF64, getDReg(dD)),
15017                               triop(Iop_MulF64, rm,
15018                                                 unop(Iop_NegF64, getDReg(dN)),
15019                                                 getDReg(dM))),
15020                         condT);
15021             DIP("vfnmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15022             goto decode_success_vfp;
15023          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
15024             /* XXXROUNDINGFIXME look up ARM reference for fused
15025                multiply-add rounding */
15026             putDReg(dD, triop(Iop_AddF64, rm,
15027                               getDReg(dD),
15028                               triop(Iop_MulF64, rm, getDReg(dN),
15029                                                     getDReg(dM))),
15030                         condT);
15031             DIP("vfmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15032             goto decode_success_vfp;
15033          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
15034             /* XXXROUNDINGFIXME look up ARM reference for fused
15035                multiply-add rounding */
15036             putDReg(dD, triop(Iop_AddF64, rm,
15037                               getDReg(dD),
15038                               triop(Iop_MulF64, rm,
15039                                     unop(Iop_NegF64, getDReg(dN)),
15040                                     getDReg(dM))),
15041                         condT);
15042             DIP("vfmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15043             goto decode_success_vfp;
15044          default:
15045             break;
15046       }
15047    }
15048
15049    /* --------------------- compares (D) --------------------- */
15050    /*          31   27   23   19   15 11   7    3
15051                  28   24   20   16 12    8    4    0
15052       FCMPD    cond 1110 1D11 0100 Dd 1011 0100 Dm
15053       FCMPED   cond 1110 1D11 0100 Dd 1011 1100 Dm
15054       FCMPZD   cond 1110 1D11 0101 Dd 1011 0100 0000
15055       FCMPZED  cond 1110 1D11 0101 Dd 1011 1100 0000
15056                                  Z         N
15057
15058       Z=0 Compare Dd vs Dm     and set FPSCR 31:28 accordingly
15059       Z=1 Compare Dd vs zero
15060
15061       N=1 generates Invalid Operation exn if either arg is any kind of NaN
15062       N=0 generates Invalid Operation exn if either arg is a signalling NaN
15063       (Not that we pay any attention to N here)
15064    */
15065    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15066        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15067        && BITS4(1,0,1,1) == INSN(11,8)
15068        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15069       UInt bZ = (insn28 >> 16) & 1;
15070       UInt bN = (insn28 >> 7) & 1;
15071       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
15072       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
15073       if (bZ && INSN(3,0) != 0) {
15074          /* does not decode; fall through */
15075       } else {
15076          IRTemp argL = newTemp(Ity_F64);
15077          IRTemp argR = newTemp(Ity_F64);
15078          IRTemp irRes = newTemp(Ity_I32);
15079          assign(argL, getDReg(dD));
15080          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0)) : getDReg(dM));
15081          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
15082
15083          IRTemp nzcv     = IRTemp_INVALID;
15084          IRTemp oldFPSCR = newTemp(Ity_I32);
15085          IRTemp newFPSCR = newTemp(Ity_I32);
15086
15087          /* This is where the fun starts.  We have to convert 'irRes'
15088             from an IR-convention return result (IRCmpF64Result) to an
15089             ARM-encoded (N,Z,C,V) group.  The final result is in the
15090             bottom 4 bits of 'nzcv'. */
15091          /* Map compare result from IR to ARM(nzcv) */
15092          /*
15093             FP cmp result | IR   | ARM(nzcv)
15094             --------------------------------
15095             UN              0x45   0011
15096             LT              0x01   1000
15097             GT              0x00   0010
15098             EQ              0x40   0110
15099          */
15100          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
15101
15102          /* And update FPSCR accordingly */
15103          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
15104          assign(newFPSCR,
15105                 binop(Iop_Or32,
15106                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
15107                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
15108
15109          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
15110
15111          if (bZ) {
15112             DIP("fcmpz%sd%s d%u\n", bN ? "e" : "", nCC(conq), dD);
15113          } else {
15114             DIP("fcmp%sd%s d%u, d%u\n", bN ? "e" : "", nCC(conq), dD, dM);
15115          }
15116          goto decode_success_vfp;
15117       }
15118       /* fall through */
15119    }
15120
15121    /* --------------------- unary (D) --------------------- */
15122    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15123        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15124        && BITS4(1,0,1,1) == INSN(11,8)
15125        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15126       UInt dD  = INSN(15,12) | (INSN(22,22) << 4);
15127       UInt dM  = INSN(3,0) | (INSN(5,5) << 4);
15128       UInt b16 = (insn28 >> 16) & 1;
15129       UInt b7  = (insn28 >> 7) & 1;
15130       /**/ if (b16 == 0 && b7 == 0) {
15131          // FCPYD
15132          putDReg(dD, getDReg(dM), condT);
15133          DIP("fcpyd%s d%u, d%u\n", nCC(conq), dD, dM);
15134          goto decode_success_vfp;
15135       }
15136       else if (b16 == 0 && b7 == 1) {
15137          // FABSD
15138          putDReg(dD, unop(Iop_AbsF64, getDReg(dM)), condT);
15139          DIP("fabsd%s d%u, d%u\n", nCC(conq), dD, dM);
15140          goto decode_success_vfp;
15141       }
15142       else if (b16 == 1 && b7 == 0) {
15143          // FNEGD
15144          putDReg(dD, unop(Iop_NegF64, getDReg(dM)), condT);
15145          DIP("fnegd%s d%u, d%u\n", nCC(conq), dD, dM);
15146          goto decode_success_vfp;
15147       }
15148       else if (b16 == 1 && b7 == 1) {
15149          // FSQRTD
15150          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
15151          putDReg(dD, binop(Iop_SqrtF64, rm, getDReg(dM)), condT);
15152          DIP("fsqrtd%s d%u, d%u\n", nCC(conq), dD, dM);
15153          goto decode_success_vfp;
15154       }
15155       else
15156          vassert(0);
15157
15158       /* fall through */
15159    }
15160
15161    /* ----------------- I <-> D conversions ----------------- */
15162
15163    // F{S,U}ITOD dD, fM
15164    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15165        && BITS4(1,0,0,0) == (INSN(19,16) & BITS4(1,1,1,1))
15166        && BITS4(1,0,1,1) == INSN(11,8)
15167        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15168       UInt bM    = (insn28 >> 5) & 1;
15169       UInt fM    = (INSN(3,0) << 1) | bM;
15170       UInt dD    = INSN(15,12) | (INSN(22,22) << 4);
15171       UInt syned = (insn28 >> 7) & 1;
15172       if (syned) {
15173          // FSITOD
15174          putDReg(dD, unop(Iop_I32StoF64,
15175                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
15176                  condT);
15177          DIP("fsitod%s d%u, s%u\n", nCC(conq), dD, fM);
15178       } else {
15179          // FUITOD
15180          putDReg(dD, unop(Iop_I32UtoF64,
15181                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
15182                  condT);
15183          DIP("fuitod%s d%u, s%u\n", nCC(conq), dD, fM);
15184       }
15185       goto decode_success_vfp;
15186    }
15187
15188    // FTO{S,U}ID fD, dM
15189    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15190        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15191        && BITS4(1,0,1,1) == INSN(11,8)
15192        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15193       UInt   bD    = (insn28 >> 22) & 1;
15194       UInt   fD    = (INSN(15,12) << 1) | bD;
15195       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
15196       UInt   bZ    = (insn28 >> 7) & 1;
15197       UInt   syned = (insn28 >> 16) & 1;
15198       IRTemp rmode = newTemp(Ity_I32);
15199       assign(rmode, bZ ? mkU32(Irrm_ZERO)
15200                        : mkexpr(mk_get_IR_rounding_mode()));
15201       if (syned) {
15202          // FTOSID
15203          putFReg(fD, unop(Iop_ReinterpI32asF32,
15204                           binop(Iop_F64toI32S, mkexpr(rmode),
15205                                 getDReg(dM))),
15206                  condT);
15207          DIP("ftosi%sd%s s%u, d%u\n", bZ ? "z" : "",
15208              nCC(conq), fD, dM);
15209       } else {
15210          // FTOUID
15211          putFReg(fD, unop(Iop_ReinterpI32asF32,
15212                           binop(Iop_F64toI32U, mkexpr(rmode),
15213                                 getDReg(dM))),
15214                  condT);
15215          DIP("ftoui%sd%s s%u, d%u\n", bZ ? "z" : "",
15216              nCC(conq), fD, dM);
15217       }
15218       goto decode_success_vfp;
15219    }
15220
15221    /* ----------------------------------------------------------- */
15222    /* -- VFP instructions -- single precision                  -- */
15223    /* ----------------------------------------------------------- */
15224
15225    /* --------------------- fldms, fstms --------------------- */
15226    /*
15227                                  31   27   23   19 15 11   7   0
15228                                          P UDWL
15229       C4-98, C5-26   1  FSTMD    cond 1100 1x00 Rn Fd 1010 offset
15230       C4-98, C5-28   2  FSTMDIA  cond 1100 1x10 Rn Fd 1010 offset
15231       C4-98, C5-30   3  FSTMDDB  cond 1101 0x10 Rn Fd 1010 offset
15232
15233       C4-40, C5-26   1  FLDMD    cond 1100 1x01 Rn Fd 1010 offset
15234       C4-40, C5-26   2  FLDMIAD  cond 1100 1x11 Rn Fd 1010 offset
15235       C4-40, C5-26   3  FLDMDBD  cond 1101 0x11 Rn Fd 1010 offset
15236
15237       Regs transferred: F(Fd:D) .. F(Fd:d + offset)
15238       offset must not imply a reg > 15
15239       IA/DB: Rn is changed by (4 x # regs transferred)
15240
15241       case coding:
15242          1  at-Rn   (access at Rn)
15243          2  ia-Rn   (access at Rn, then Rn += 4n)
15244          3  db-Rn   (Rn -= 4n,     then access at Rn)
15245    */
15246    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
15247        && INSN(11,8) == BITS4(1,0,1,0)) {
15248       UInt bP      = (insn28 >> 24) & 1;
15249       UInt bU      = (insn28 >> 23) & 1;
15250       UInt bW      = (insn28 >> 21) & 1;
15251       UInt bL      = (insn28 >> 20) & 1;
15252       UInt bD      = (insn28 >> 22) & 1;
15253       UInt offset  = (insn28 >> 0) & 0xFF;
15254       UInt rN      = INSN(19,16);
15255       UInt fD      = (INSN(15,12) << 1) | bD;
15256       UInt nRegs   = offset;
15257       UInt summary = 0;
15258       Int  i;
15259
15260       /**/ if (bP == 0 && bU == 1 && bW == 0) {
15261          summary = 1;
15262       }
15263       else if (bP == 0 && bU == 1 && bW == 1) {
15264          summary = 2;
15265       }
15266       else if (bP == 1 && bU == 0 && bW == 1) {
15267          summary = 3;
15268       }
15269       else goto after_vfp_fldms_fstms;
15270
15271       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
15272       if (rN == 15 && (summary == 2 || summary == 3 || isT))
15273          goto after_vfp_fldms_fstms;
15274
15275       /* offset must specify at least one register */
15276       if (offset < 1)
15277          goto after_vfp_fldms_fstms;
15278
15279       /* can't transfer regs after S31 */
15280       if (fD + nRegs - 1 >= 32)
15281          goto after_vfp_fldms_fstms;
15282
15283       /* Now, we can't do a conditional load or store, since that very
15284          likely will generate an exception.  So we have to take a side
15285          exit at this point if the condition is false. */
15286       if (condT != IRTemp_INVALID) {
15287          if (isT)
15288             mk_skip_over_T32_if_cond_is_false( condT );
15289          else
15290             mk_skip_over_A32_if_cond_is_false( condT );
15291          condT = IRTemp_INVALID;
15292       }
15293       /* Ok, now we're unconditional.  Do the load or store. */
15294
15295       /* get the old Rn value */
15296       IRTemp rnT = newTemp(Ity_I32);
15297       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
15298                            rN == 15));
15299
15300       /* make a new value for Rn, post-insn */
15301       IRTemp rnTnew = IRTemp_INVALID;
15302       if (summary == 2 || summary == 3) {
15303          rnTnew = newTemp(Ity_I32);
15304          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
15305                               mkexpr(rnT),
15306                               mkU32(4 * nRegs)));
15307       }
15308
15309       /* decide on the base transfer address */
15310       IRTemp taT = newTemp(Ity_I32);
15311       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
15312
15313       /* update Rn if necessary -- in case 3, we're moving it down, so
15314          update before any memory reference, in order to keep Memcheck
15315          and V's stack-extending logic (on linux) happy */
15316       if (summary == 3) {
15317          if (isT)
15318             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
15319          else
15320             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
15321       }
15322
15323       /* generate the transfers */
15324       for (i = 0; i < nRegs; i++) {
15325          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(4*i));
15326          if (bL) {
15327             putFReg(fD + i, loadLE(Ity_F32, addr), IRTemp_INVALID);
15328          } else {
15329             storeLE(addr, getFReg(fD + i));
15330          }
15331       }
15332
15333       /* update Rn if necessary -- in case 2, we're moving it up, so
15334          update after any memory reference, in order to keep Memcheck
15335          and V's stack-extending logic (on linux) happy */
15336       if (summary == 2) {
15337          if (isT)
15338             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
15339          else
15340             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
15341       }
15342
15343       const HChar* nm = bL==1 ? "ld" : "st";
15344       switch (summary) {
15345          case 1:  DIP("f%sms%s r%u, {s%u-s%u}\n",
15346                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
15347                   break;
15348          case 2:  DIP("f%smias%s r%u!, {s%u-s%u}\n",
15349                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
15350                   break;
15351          case 3:  DIP("f%smdbs%s r%u!, {s%u-s%u}\n",
15352                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
15353                   break;
15354          default: vassert(0);
15355       }
15356
15357       goto decode_success_vfp;
15358       /* FIXME alignment constraints? */
15359    }
15360
15361   after_vfp_fldms_fstms:
15362
15363    /* --------------------- fmsr, fmrs --------------------- */
15364    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
15365        && BITS4(1,0,1,0) == INSN(11,8)
15366        && BITS4(0,0,0,0) == INSN(3,0)
15367        && BITS4(0,0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
15368       UInt rD  = INSN(15,12);
15369       UInt b7  = (insn28 >> 7) & 1;
15370       UInt fN  = (INSN(19,16) << 1) | b7;
15371       UInt b20 = (insn28 >> 20) & 1;
15372       if (rD == 15) {
15373          /* fall through */
15374          /* Let's assume that no sane person would want to do
15375             floating-point transfers to or from the program counter,
15376             and simply decline to decode the instruction.  The ARM ARM
15377             doesn't seem to explicitly disallow this case, though. */
15378       } else {
15379          if (b20) {
15380             IRExpr* res = unop(Iop_ReinterpF32asI32, getFReg(fN));
15381             if (isT)
15382                putIRegT(rD, res, condT);
15383             else
15384                putIRegA(rD, res, condT, Ijk_Boring);
15385             DIP("fmrs%s r%u, s%u\n", nCC(conq), rD, fN);
15386          } else {
15387             putFReg(fN, unop(Iop_ReinterpI32asF32,
15388                              isT ? getIRegT(rD) : getIRegA(rD)),
15389                         condT);
15390             DIP("fmsr%s s%u, r%u\n", nCC(conq), fN, rD);
15391          }
15392          goto decode_success_vfp;
15393       }
15394       /* fall through */
15395    }
15396
15397    /* --------------------- f{ld,st}s --------------------- */
15398    // FLDS, FSTS
15399    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
15400        && BITS4(1,0,1,0) == INSN(11,8)) {
15401       UInt bD     = (insn28 >> 22) & 1;
15402       UInt fD     = (INSN(15,12) << 1) | bD;
15403       UInt rN     = INSN(19,16);
15404       UInt offset = (insn28 & 0xFF) << 2;
15405       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
15406       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
15407       /* make unconditional */
15408       if (condT != IRTemp_INVALID) {
15409          if (isT)
15410             mk_skip_over_T32_if_cond_is_false( condT );
15411          else
15412             mk_skip_over_A32_if_cond_is_false( condT );
15413          condT = IRTemp_INVALID;
15414       }
15415       IRTemp ea = newTemp(Ity_I32);
15416       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
15417                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
15418                                 rN == 15),
15419                        mkU32(offset)));
15420       if (bL) {
15421          putFReg(fD, loadLE(Ity_F32,mkexpr(ea)), IRTemp_INVALID);
15422       } else {
15423          storeLE(mkexpr(ea), getFReg(fD));
15424       }
15425       DIP("f%ss%s s%u, [r%u, %c#%u]\n",
15426           bL ? "ld" : "st", nCC(conq), fD, rN,
15427           bU ? '+' : '-', offset);
15428       goto decode_success_vfp;
15429    }
15430
15431    /* --------------------- dp insns (F) --------------------- */
15432    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
15433        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
15434        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
15435       UInt    bM  = (insn28 >> 5) & 1;
15436       UInt    bD  = (insn28 >> 22) & 1;
15437       UInt    bN  = (insn28 >> 7) & 1;
15438       UInt    fM  = (INSN(3,0) << 1) | bM;   /* argR */
15439       UInt    fD  = (INSN(15,12) << 1) | bD; /* dst/acc */
15440       UInt    fN  = (INSN(19,16) << 1) | bN; /* argL */
15441       UInt    bP  = (insn28 >> 23) & 1;
15442       UInt    bQ  = (insn28 >> 21) & 1;
15443       UInt    bR  = (insn28 >> 20) & 1;
15444       UInt    bS  = (insn28 >> 6) & 1;
15445       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
15446       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
15447       switch (opc) {
15448          case BITS4(0,0,0,0): /* MAC: d + n * m */
15449             putFReg(fD, triop(Iop_AddF32, rm,
15450                               getFReg(fD),
15451                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
15452                         condT);
15453             DIP("fmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15454             goto decode_success_vfp;
15455          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
15456             putFReg(fD, triop(Iop_AddF32, rm,
15457                               getFReg(fD),
15458                               unop(Iop_NegF32,
15459                                    triop(Iop_MulF32, rm, getFReg(fN),
15460                                                          getFReg(fM)))),
15461                         condT);
15462             DIP("fnmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15463             goto decode_success_vfp;
15464          case BITS4(0,0,1,0): /* MSC: - d + n * m */
15465             putFReg(fD, triop(Iop_AddF32, rm,
15466                               unop(Iop_NegF32, getFReg(fD)),
15467                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
15468                         condT);
15469             DIP("fmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15470             goto decode_success_vfp;
15471          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
15472             putFReg(fD, triop(Iop_AddF32, rm,
15473                               unop(Iop_NegF32, getFReg(fD)),
15474                               unop(Iop_NegF32,
15475                                    triop(Iop_MulF32, rm,
15476                                                      getFReg(fN),
15477                                                     getFReg(fM)))),
15478                         condT);
15479             DIP("fnmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15480             goto decode_success_vfp;
15481          case BITS4(0,1,0,0): /* MUL: n * m */
15482             putFReg(fD, triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM)),
15483                         condT);
15484             DIP("fmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15485             goto decode_success_vfp;
15486          case BITS4(0,1,0,1): /* NMUL: - n * m */
15487             putFReg(fD, unop(Iop_NegF32,
15488                              triop(Iop_MulF32, rm, getFReg(fN),
15489                                                    getFReg(fM))),
15490                     condT);
15491             DIP("fnmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15492             goto decode_success_vfp;
15493          case BITS4(0,1,1,0): /* ADD: n + m */
15494             putFReg(fD, triop(Iop_AddF32, rm, getFReg(fN), getFReg(fM)),
15495                         condT);
15496             DIP("fadds%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15497             goto decode_success_vfp;
15498          case BITS4(0,1,1,1): /* SUB: n - m */
15499             putFReg(fD, triop(Iop_SubF32, rm, getFReg(fN), getFReg(fM)),
15500                         condT);
15501             DIP("fsubs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15502             goto decode_success_vfp;
15503          case BITS4(1,0,0,0): /* DIV: n / m */
15504             putFReg(fD, triop(Iop_DivF32, rm, getFReg(fN), getFReg(fM)),
15505                         condT);
15506             DIP("fdivs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15507             goto decode_success_vfp;
15508          case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
15509             /* XXXROUNDINGFIXME look up ARM reference for fused
15510                multiply-add rounding */
15511             putFReg(fD, triop(Iop_AddF32, rm,
15512                               unop(Iop_NegF32, getFReg(fD)),
15513                               triop(Iop_MulF32, rm,
15514                                                 getFReg(fN),
15515                                                 getFReg(fM))),
15516                         condT);
15517             DIP("vfnmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15518             goto decode_success_vfp;
15519          case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
15520             /* XXXROUNDINGFIXME look up ARM reference for fused
15521                multiply-add rounding */
15522             putFReg(fD, triop(Iop_AddF32, rm,
15523                               unop(Iop_NegF32, getFReg(fD)),
15524                               triop(Iop_MulF32, rm,
15525                                                 unop(Iop_NegF32, getFReg(fN)),
15526                                                 getFReg(fM))),
15527                         condT);
15528             DIP("vfnmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15529             goto decode_success_vfp;
15530          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
15531             /* XXXROUNDINGFIXME look up ARM reference for fused
15532                multiply-add rounding */
15533             putFReg(fD, triop(Iop_AddF32, rm,
15534                               getFReg(fD),
15535                               triop(Iop_MulF32, rm, getFReg(fN),
15536                                                     getFReg(fM))),
15537                         condT);
15538             DIP("vfmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15539             goto decode_success_vfp;
15540          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
15541             /* XXXROUNDINGFIXME look up ARM reference for fused
15542                multiply-add rounding */
15543             putFReg(fD, triop(Iop_AddF32, rm,
15544                               getFReg(fD),
15545                               triop(Iop_MulF32, rm,
15546                                     unop(Iop_NegF32, getFReg(fN)),
15547                                     getFReg(fM))),
15548                         condT);
15549             DIP("vfmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15550             goto decode_success_vfp;
15551          default:
15552             break;
15553       }
15554    }
15555
15556    /* --------------------- compares (S) --------------------- */
15557    /*          31   27   23   19   15 11   7    3
15558                  28   24   20   16 12    8    4    0
15559       FCMPS    cond 1110 1D11 0100 Fd 1010 01M0 Fm
15560       FCMPES   cond 1110 1D11 0100 Fd 1010 11M0 Fm
15561       FCMPZS   cond 1110 1D11 0101 Fd 1010 0100 0000
15562       FCMPZED  cond 1110 1D11 0101 Fd 1010 1100 0000
15563                                  Z         N
15564
15565       Z=0 Compare Fd:D vs Fm:M     and set FPSCR 31:28 accordingly
15566       Z=1 Compare Fd:D vs zero
15567
15568       N=1 generates Invalid Operation exn if either arg is any kind of NaN
15569       N=0 generates Invalid Operation exn if either arg is a signalling NaN
15570       (Not that we pay any attention to N here)
15571    */
15572    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15573        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15574        && BITS4(1,0,1,0) == INSN(11,8)
15575        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15576       UInt bZ = (insn28 >> 16) & 1;
15577       UInt bN = (insn28 >> 7) & 1;
15578       UInt bD = (insn28 >> 22) & 1;
15579       UInt bM = (insn28 >> 5) & 1;
15580       UInt fD = (INSN(15,12) << 1) | bD;
15581       UInt fM = (INSN(3,0) << 1) | bM;
15582       if (bZ && (INSN(3,0) != 0 || (INSN(7,4) & 3) != 0)) {
15583          /* does not decode; fall through */
15584       } else {
15585          IRTemp argL = newTemp(Ity_F64);
15586          IRTemp argR = newTemp(Ity_F64);
15587          IRTemp irRes = newTemp(Ity_I32);
15588
15589          assign(argL, unop(Iop_F32toF64, getFReg(fD)));
15590          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0))
15591                          : unop(Iop_F32toF64, getFReg(fM)));
15592          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
15593
15594          IRTemp nzcv     = IRTemp_INVALID;
15595          IRTemp oldFPSCR = newTemp(Ity_I32);
15596          IRTemp newFPSCR = newTemp(Ity_I32);
15597
15598          /* This is where the fun starts.  We have to convert 'irRes'
15599             from an IR-convention return result (IRCmpF64Result) to an
15600             ARM-encoded (N,Z,C,V) group.  The final result is in the
15601             bottom 4 bits of 'nzcv'. */
15602          /* Map compare result from IR to ARM(nzcv) */
15603          /*
15604             FP cmp result | IR   | ARM(nzcv)
15605             --------------------------------
15606             UN              0x45   0011
15607             LT              0x01   1000
15608             GT              0x00   0010
15609             EQ              0x40   0110
15610          */
15611          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
15612
15613          /* And update FPSCR accordingly */
15614          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
15615          assign(newFPSCR,
15616                 binop(Iop_Or32,
15617                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
15618                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
15619
15620          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
15621
15622          if (bZ) {
15623             DIP("fcmpz%ss%s s%u\n", bN ? "e" : "", nCC(conq), fD);
15624          } else {
15625             DIP("fcmp%ss%s s%u, s%u\n", bN ? "e" : "",
15626                 nCC(conq), fD, fM);
15627          }
15628          goto decode_success_vfp;
15629       }
15630       /* fall through */
15631    }
15632
15633    /* --------------------- unary (S) --------------------- */
15634    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15635        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15636        && BITS4(1,0,1,0) == INSN(11,8)
15637        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15638       UInt bD = (insn28 >> 22) & 1;
15639       UInt bM = (insn28 >> 5) & 1;
15640       UInt fD  = (INSN(15,12) << 1) | bD;
15641       UInt fM  = (INSN(3,0) << 1) | bM;
15642       UInt b16 = (insn28 >> 16) & 1;
15643       UInt b7  = (insn28 >> 7) & 1;
15644       /**/ if (b16 == 0 && b7 == 0) {
15645          // FCPYS
15646          putFReg(fD, getFReg(fM), condT);
15647          DIP("fcpys%s s%u, s%u\n", nCC(conq), fD, fM);
15648          goto decode_success_vfp;
15649       }
15650       else if (b16 == 0 && b7 == 1) {
15651          // FABSS
15652          putFReg(fD, unop(Iop_AbsF32, getFReg(fM)), condT);
15653          DIP("fabss%s s%u, s%u\n", nCC(conq), fD, fM);
15654          goto decode_success_vfp;
15655       }
15656       else if (b16 == 1 && b7 == 0) {
15657          // FNEGS
15658          putFReg(fD, unop(Iop_NegF32, getFReg(fM)), condT);
15659          DIP("fnegs%s s%u, s%u\n", nCC(conq), fD, fM);
15660          goto decode_success_vfp;
15661       }
15662       else if (b16 == 1 && b7 == 1) {
15663          // FSQRTS
15664          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
15665          putFReg(fD, binop(Iop_SqrtF32, rm, getFReg(fM)), condT);
15666          DIP("fsqrts%s s%u, s%u\n", nCC(conq), fD, fM);
15667          goto decode_success_vfp;
15668       }
15669       else
15670          vassert(0);
15671
15672       /* fall through */
15673    }
15674
15675    /* ----------------- I <-> S conversions ----------------- */
15676
15677    // F{S,U}ITOS fD, fM
15678    /* These are more complex than FSITOD/FUITOD.  In the D cases, a 32
15679       bit int will always fit within the 53 bit mantissa, so there's
15680       no possibility of a loss of precision, but that's obviously not
15681       the case here.  Hence this case possibly requires rounding, and
15682       so it drags in the current rounding mode. */
15683    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15684        && BITS4(1,0,0,0) == INSN(19,16)
15685        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
15686        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15687       UInt bM    = (insn28 >> 5) & 1;
15688       UInt bD    = (insn28 >> 22) & 1;
15689       UInt fM    = (INSN(3,0) << 1) | bM;
15690       UInt fD    = (INSN(15,12) << 1) | bD;
15691       UInt syned = (insn28 >> 7) & 1;
15692       IRTemp rmode = newTemp(Ity_I32);
15693       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
15694       if (syned) {
15695          // FSITOS
15696          putFReg(fD, binop(Iop_F64toF32,
15697                            mkexpr(rmode),
15698                            unop(Iop_I32StoF64,
15699                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
15700                  condT);
15701          DIP("fsitos%s s%u, s%u\n", nCC(conq), fD, fM);
15702       } else {
15703          // FUITOS
15704          putFReg(fD, binop(Iop_F64toF32,
15705                            mkexpr(rmode),
15706                            unop(Iop_I32UtoF64,
15707                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
15708                  condT);
15709          DIP("fuitos%s s%u, s%u\n", nCC(conq), fD, fM);
15710       }
15711       goto decode_success_vfp;
15712    }
15713
15714    // FTO{S,U}IS fD, fM
15715    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15716        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15717        && BITS4(1,0,1,0) == INSN(11,8)
15718        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15719       UInt   bM    = (insn28 >> 5) & 1;
15720       UInt   bD    = (insn28 >> 22) & 1;
15721       UInt   fD    = (INSN(15,12) << 1) | bD;
15722       UInt   fM    = (INSN(3,0) << 1) | bM;
15723       UInt   bZ    = (insn28 >> 7) & 1;
15724       UInt   syned = (insn28 >> 16) & 1;
15725       IRTemp rmode = newTemp(Ity_I32);
15726       assign(rmode, bZ ? mkU32(Irrm_ZERO)
15727                        : mkexpr(mk_get_IR_rounding_mode()));
15728       if (syned) {
15729          // FTOSIS
15730          putFReg(fD, unop(Iop_ReinterpI32asF32,
15731                           binop(Iop_F64toI32S, mkexpr(rmode),
15732                                 unop(Iop_F32toF64, getFReg(fM)))),
15733                  condT);
15734          DIP("ftosi%ss%s s%u, d%u\n", bZ ? "z" : "",
15735              nCC(conq), fD, fM);
15736          goto decode_success_vfp;
15737       } else {
15738          // FTOUIS
15739          putFReg(fD, unop(Iop_ReinterpI32asF32,
15740                           binop(Iop_F64toI32U, mkexpr(rmode),
15741                                 unop(Iop_F32toF64, getFReg(fM)))),
15742                  condT);
15743          DIP("ftoui%ss%s s%u, d%u\n", bZ ? "z" : "",
15744              nCC(conq), fD, fM);
15745          goto decode_success_vfp;
15746       }
15747    }
15748
15749    /* ----------------- S <-> D conversions ----------------- */
15750
15751    // FCVTDS
15752    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15753        && BITS4(0,1,1,1) == INSN(19,16)
15754        && BITS4(1,0,1,0) == INSN(11,8)
15755        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
15756       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
15757       UInt bM = (insn28 >> 5) & 1;
15758       UInt fM = (INSN(3,0) << 1) | bM;
15759       putDReg(dD, unop(Iop_F32toF64, getFReg(fM)), condT);
15760       DIP("fcvtds%s d%u, s%u\n", nCC(conq), dD, fM);
15761       goto decode_success_vfp;
15762    }
15763
15764    // FCVTSD
15765    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15766        && BITS4(0,1,1,1) == INSN(19,16)
15767        && BITS4(1,0,1,1) == INSN(11,8)
15768        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
15769       UInt   bD    = (insn28 >> 22) & 1;
15770       UInt   fD    = (INSN(15,12) << 1) | bD;
15771       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
15772       IRTemp rmode = newTemp(Ity_I32);
15773       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
15774       putFReg(fD, binop(Iop_F64toF32, mkexpr(rmode), getDReg(dM)),
15775                   condT);
15776       DIP("fcvtsd%s s%u, d%u\n", nCC(conq), fD, dM);
15777       goto decode_success_vfp;
15778    }
15779
15780    /* --------------- VCVT fixed<->floating, VFP --------------- */
15781    /*          31   27   23   19   15 11   7    3
15782                  28   24   20   16 12    8    4    0
15783
15784                cond 1110 1D11 1p1U Vd 101f x1i0 imm4
15785
15786       VCVT<c>.<Td>.F64 <Dd>, <Dd>, #fbits
15787       VCVT<c>.<Td>.F32 <Dd>, <Dd>, #fbits
15788       VCVT<c>.F64.<Td> <Dd>, <Dd>, #fbits
15789       VCVT<c>.F32.<Td> <Dd>, <Dd>, #fbits
15790       are of this form.  We only handle a subset of the cases though.
15791    */
15792    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15793        && BITS4(1,0,1,0) == (INSN(19,16) & BITS4(1,0,1,0))
15794        && BITS3(1,0,1) == INSN(11,9)
15795        && BITS3(1,0,0) == (INSN(6,4) & BITS3(1,0,1))) {
15796       UInt bD        = INSN(22,22);
15797       UInt bOP       = INSN(18,18);
15798       UInt bU        = INSN(16,16);
15799       UInt Vd        = INSN(15,12);
15800       UInt bSF       = INSN(8,8);
15801       UInt bSX       = INSN(7,7);
15802       UInt bI        = INSN(5,5);
15803       UInt imm4      = INSN(3,0);
15804       Bool to_fixed  = bOP == 1;
15805       Bool dp_op     = bSF == 1;
15806       Bool unsyned   = bU == 1;
15807       UInt size      = bSX == 0 ? 16 : 32;
15808       Int  frac_bits = size - ((imm4 << 1) | bI);
15809       UInt d         = dp_op  ? ((bD << 4) | Vd)  : ((Vd << 1) | bD);
15810
15811       IRExpr* rm     = mkU32(Irrm_NEAREST);
15812       IRTemp  scale  = newTemp(Ity_F64);
15813       assign(scale, unop(Iop_I32UtoF64, mkU32( ((UInt)1) << (frac_bits-1) )));
15814
15815       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && !dp_op
15816                                             && size == 32) {
15817          /* VCVT.F32.{S,U}32 S[d], S[d], #frac_bits */
15818          /* This generates really horrible code.  We could potentially
15819             do much better. */
15820          IRTemp rmode = newTemp(Ity_I32);
15821          assign(rmode, mkU32(Irrm_NEAREST)); // per the spec
15822          IRTemp src32 = newTemp(Ity_I32);
15823          assign(src32,  unop(Iop_ReinterpF32asI32, getFReg(d)));
15824          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
15825                                 mkexpr(src32 ) );
15826          IRExpr* resF64 = triop(Iop_DivF64,
15827                                 rm, as_F64,
15828                                 triop(Iop_AddF64, rm, mkexpr(scale),
15829                                                       mkexpr(scale)));
15830          IRExpr* resF32 = binop(Iop_F64toF32, mkexpr(rmode), resF64);
15831          putFReg(d, resF32, condT);
15832          DIP("vcvt.f32.%c32, s%u, s%u, #%d\n",
15833              unsyned ? 'u' : 's', d, d, frac_bits);
15834          goto decode_success_vfp;
15835       }
15836       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && dp_op
15837                                             && size == 32) {
15838          /* VCVT.F64.{S,U}32 D[d], D[d], #frac_bits */
15839          /* This generates really horrible code.  We could potentially
15840             do much better. */
15841          IRTemp src32 = newTemp(Ity_I32);
15842          assign(src32, unop(Iop_64to32, getDRegI64(d)));
15843          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
15844                                 mkexpr(src32 ) );
15845          IRExpr* resF64 = triop(Iop_DivF64,
15846                                 rm, as_F64,
15847                                 triop(Iop_AddF64, rm, mkexpr(scale),
15848                                                       mkexpr(scale)));
15849          putDReg(d, resF64, condT);
15850          DIP("vcvt.f64.%c32, d%u, d%u, #%d\n",
15851              unsyned ? 'u' : 's', d, d, frac_bits);
15852          goto decode_success_vfp;
15853       }
15854       if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && dp_op
15855                                             && size == 32) {
15856          /* VCVT.{S,U}32.F64 D[d], D[d], #frac_bits */
15857          IRTemp srcF64 = newTemp(Ity_F64);
15858          assign(srcF64, getDReg(d));
15859          IRTemp scaledF64 = newTemp(Ity_F64);
15860          assign(scaledF64, triop(Iop_MulF64,
15861                                  rm, mkexpr(srcF64),
15862                                  triop(Iop_AddF64, rm, mkexpr(scale),
15863                                                        mkexpr(scale))));
15864          IRTemp rmode = newTemp(Ity_I32);
15865          assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
15866          IRTemp asI32 = newTemp(Ity_I32);
15867          assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
15868                              mkexpr(rmode), mkexpr(scaledF64)));
15869          putDRegI64(d, unop(unsyned ? Iop_32Uto64 : Iop_32Sto64,
15870                             mkexpr(asI32)), condT);
15871
15872          DIP("vcvt.%c32.f64, d%u, d%u, #%d\n",
15873              unsyned ? 'u' : 's', d, d, frac_bits);
15874          goto decode_success_vfp;
15875       }
15876       if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && !dp_op
15877                                             && size == 32) {
15878          /* VCVT.{S,U}32.F32 S[d], S[d], #frac_bits */
15879          IRTemp srcF32 = newTemp(Ity_F32);
15880          assign(srcF32, getFReg(d));
15881          IRTemp scaledF64 = newTemp(Ity_F64);
15882          assign(scaledF64, triop(Iop_MulF64,
15883                                  rm, unop(Iop_F32toF64, mkexpr(srcF32)),
15884                                  triop(Iop_AddF64, rm, mkexpr(scale),
15885                                                        mkexpr(scale))));
15886          IRTemp rmode = newTemp(Ity_I32);
15887          assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
15888          IRTemp asI32 = newTemp(Ity_I32);
15889          assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
15890                              mkexpr(rmode), mkexpr(scaledF64)));
15891          putFReg(d, unop(Iop_ReinterpI32asF32, mkexpr(asI32)), condT);
15892          DIP("vcvt.%c32.f32, d%u, d%u, #%d\n",
15893              unsyned ? 'u' : 's', d, d, frac_bits);
15894          goto decode_success_vfp;
15895       }
15896       /* fall through */
15897    }
15898
15899    /* FAILURE */
15900    return False;
15901
15902   decode_success_vfp:
15903    /* Check that any accepted insn really is a CP10 or CP11 insn, iow,
15904       assert that we aren't accepting, in this fn, insns that actually
15905       should be handled somewhere else. */
15906    vassert(INSN(11,9) == BITS3(1,0,1)); // 11:8 = 1010 or 1011
15907    return True;
15908
15909 #  undef INSN
15910 }
15911
15912
15913 /*------------------------------------------------------------*/
15914 /*--- Instructions in NV (never) space                     ---*/
15915 /*------------------------------------------------------------*/
15916
15917 /* ARM only */
15918 /* Translate a NV space instruction.  If successful, returns True and
15919    *dres may or may not be updated.  If failure, returns False and
15920    doesn't change *dres nor create any IR.
15921
15922    Note that all NEON instructions (in ARM mode) up to and including
15923    ARMv7, but not later, are handled through here, since they are all
15924    in NV space.
15925 */
15926 static Bool decode_NV_instruction_ARMv7_and_below
15927                                  ( /*MOD*/DisResult* dres,
15928                                     const VexArchInfo* archinfo,
15929                                     UInt insn )
15930 {
15931 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
15932 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
15933
15934    HChar dis_buf[128];
15935
15936    // Should only be called for NV instructions
15937    vassert(BITS4(1,1,1,1) == INSN_COND);
15938
15939    /* ------------------------ pld{w} ------------------------ */
15940    if (BITS8(0,1,0,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
15941        && BITS4(1,1,1,1) == INSN(15,12)) {
15942       UInt rN    = INSN(19,16);
15943       UInt imm12 = INSN(11,0);
15944       UInt bU    = INSN(23,23);
15945       UInt bR    = INSN(22,22);
15946       DIP("pld%c [r%u, #%c%u]\n", bR ? ' ' : 'w', rN, bU ? '+' : '-', imm12);
15947       return True;
15948    }
15949
15950    if (BITS8(0,1,1,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
15951        && BITS4(1,1,1,1) == INSN(15,12)
15952        && 0 == INSN(4,4)) {
15953       UInt rN   = INSN(19,16);
15954       UInt rM   = INSN(3,0);
15955       UInt imm5 = INSN(11,7);
15956       UInt sh2  = INSN(6,5);
15957       UInt bU   = INSN(23,23);
15958       UInt bR   = INSN(22,22);
15959       if (rM != 15 && (rN != 15 || bR)) {
15960          IRExpr* eaE = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
15961                                                        sh2, imm5, dis_buf);
15962          IRTemp eaT = newTemp(Ity_I32);
15963          /* Bind eaE to a temp merely for debugging-vex purposes, so we
15964             can check it's a plausible decoding.  It will get removed
15965             by iropt a little later on. */
15966          vassert(eaE);
15967          assign(eaT, eaE);
15968          DIP("pld%c %s\n", bR ? ' ' : 'w', dis_buf);
15969          return True;
15970       }
15971       /* fall through */
15972    }
15973
15974    /* ------------------------ pli ------------------------ */
15975    if (BITS8(0,1,0,0, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
15976        && BITS4(1,1,1,1) == INSN(15,12)) {
15977       UInt rN    = INSN(19,16);
15978       UInt imm12 = INSN(11,0);
15979       UInt bU    = INSN(23,23);
15980       DIP("pli [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
15981       return True;
15982    }
15983
15984    /* --------------------- Interworking branches --------------------- */
15985
15986    // BLX (1), viz, unconditional branch and link to R15+simm24
15987    // and set CPSR.T = 1, that is, switch to Thumb mode
15988    if (INSN(31,25) == BITS7(1,1,1,1,1,0,1)) {
15989       UInt bitH   = INSN(24,24);
15990       UInt uimm24 = INSN(23,0);   uimm24 <<= 8;
15991       Int  simm24 = (Int)uimm24;  simm24 >>= 8;
15992       simm24 = (((UInt)simm24) << 2) + (bitH << 1);
15993       /* Now this is a bit tricky.  Since we're decoding an ARM insn,
15994          it is implies that CPSR.T == 0.  Hence the current insn's
15995          address is guaranteed to be of the form X--(30)--X00.  So, no
15996          need to mask any bits off it.  But need to set the lowest bit
15997          to 1 to denote we're in Thumb mode after this, since
15998          guest_R15T has CPSR.T as the lowest bit.  And we can't chase
15999          into the call, so end the block at this point. */
16000       UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
16001       putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
16002                     IRTemp_INVALID/*because AL*/, Ijk_Boring );
16003       llPutIReg(15, mkU32(dst));
16004       dres->jk_StopHere = Ijk_Call;
16005       dres->whatNext    = Dis_StopHere;
16006       DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
16007       return True;
16008    }
16009
16010    /* ------------------- v7 barrier insns ------------------- */
16011    switch (insn) {
16012       case 0xF57FF06F: /* ISB */
16013          stmt( IRStmt_MBE(Imbe_Fence) );
16014          DIP("ISB\n");
16015          return True;
16016       case 0xF57FF04F: /* DSB sy */
16017       case 0xF57FF04E: /* DSB st */
16018       case 0xF57FF04B: /* DSB ish */
16019       case 0xF57FF04A: /* DSB ishst */
16020       case 0xF57FF047: /* DSB nsh */
16021       case 0xF57FF046: /* DSB nshst */
16022       case 0xF57FF043: /* DSB osh */
16023       case 0xF57FF042: /* DSB oshst */
16024          stmt( IRStmt_MBE(Imbe_Fence) );
16025          DIP("DSB\n");
16026          return True;
16027       case 0xF57FF05F: /* DMB sy */
16028       case 0xF57FF05E: /* DMB st */
16029       case 0xF57FF05B: /* DMB ish */
16030       case 0xF57FF05A: /* DMB ishst */
16031       case 0xF57FF057: /* DMB nsh */
16032       case 0xF57FF056: /* DMB nshst */
16033       case 0xF57FF053: /* DMB osh */
16034       case 0xF57FF052: /* DMB oshst */
16035          stmt( IRStmt_MBE(Imbe_Fence) );
16036          DIP("DMB\n");
16037          return True;
16038       default:
16039          break;
16040    }
16041
16042    /* ------------------- CLREX ------------------ */
16043    if (insn == 0xF57FF01F) {
16044       /* AFAICS, this simply cancels a (all?) reservations made by a
16045          (any?) preceding LDREX(es).  Arrange to hand it through to
16046          the back end. */
16047       stmt( IRStmt_MBE(Imbe_CancelReservation) );
16048       DIP("clrex\n");
16049       return True;
16050    }
16051
16052    /* ------------------- NEON ------------------- */
16053    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
16054       Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
16055                         dres, insn, IRTemp_INVALID/*unconditional*/,
16056                         False/*!isT*/
16057                      );
16058       if (ok_neon)
16059          return True;
16060    }
16061
16062    // unrecognised
16063    return False;
16064
16065 #  undef INSN_COND
16066 #  undef INSN
16067 }
16068
16069
16070 /*------------------------------------------------------------*/
16071 /*--- Disassemble a single ARM instruction                 ---*/
16072 /*------------------------------------------------------------*/
16073
16074 /* Disassemble a single ARM instruction into IR.  The instruction is
16075    located in host memory at guest_instr, and has (decoded) guest IP
16076    of guest_R15_curr_instr_notENC, which will have been set before the
16077    call here. */
16078
16079 static
16080 DisResult disInstr_ARM_WRK (
16081              const UChar* guest_instr,
16082              const VexArchInfo* archinfo,
16083              const VexAbiInfo*  abiinfo,
16084              Bool         sigill_diag
16085           )
16086 {
16087    // A macro to fish bits out of 'insn'.
16088 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
16089 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
16090
16091    DisResult dres;
16092    UInt      insn;
16093    IRTemp    condT; /* :: Ity_I32 */
16094    UInt      summary;
16095    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
16096
16097    /* Set result defaults. */
16098    dres.whatNext    = Dis_Continue;
16099    dres.len         = 4;
16100    dres.jk_StopHere = Ijk_INVALID;
16101    dres.hint        = Dis_HintNone;
16102
16103    /* Set default actions for post-insn handling of writes to r15, if
16104       required. */
16105    r15written = False;
16106    r15guard   = IRTemp_INVALID; /* unconditional */
16107    r15kind    = Ijk_Boring;
16108
16109    /* At least this is simple on ARM: insns are all 4 bytes long, and
16110       4-aligned.  So just fish the whole thing out of memory right now
16111       and have done. */
16112    insn = getUIntLittleEndianly( guest_instr );
16113
16114    if (0) vex_printf("insn: 0x%x\n", insn);
16115
16116    DIP("\t(arm) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
16117
16118    vassert(0 == (guest_R15_curr_instr_notENC & 3));
16119
16120    /* ----------------------------------------------------------- */
16121
16122    /* Spot "Special" instructions (see comment at top of file). */
16123    {
16124       const UChar* code = guest_instr;
16125       /* Spot the 16-byte preamble:
16126
16127          e1a0c1ec  mov r12, r12, ROR #3
16128          e1a0c6ec  mov r12, r12, ROR #13
16129          e1a0ceec  mov r12, r12, ROR #29
16130          e1a0c9ec  mov r12, r12, ROR #19
16131       */
16132       UInt word1 = 0xE1A0C1EC;
16133       UInt word2 = 0xE1A0C6EC;
16134       UInt word3 = 0xE1A0CEEC;
16135       UInt word4 = 0xE1A0C9EC;
16136       if (getUIntLittleEndianly(code+ 0) == word1 &&
16137           getUIntLittleEndianly(code+ 4) == word2 &&
16138           getUIntLittleEndianly(code+ 8) == word3 &&
16139           getUIntLittleEndianly(code+12) == word4) {
16140          /* Got a "Special" instruction preamble.  Which one is it? */
16141          if (getUIntLittleEndianly(code+16) == 0xE18AA00A
16142                                                /* orr r10,r10,r10 */) {
16143             /* R3 = client_request ( R4 ) */
16144             DIP("r3 = client_request ( %%r4 )\n");
16145             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
16146             dres.jk_StopHere = Ijk_ClientReq;
16147             dres.whatNext    = Dis_StopHere;
16148             goto decode_success;
16149          }
16150          else
16151          if (getUIntLittleEndianly(code+16) == 0xE18BB00B
16152                                                /* orr r11,r11,r11 */) {
16153             /* R3 = guest_NRADDR */
16154             DIP("r3 = guest_NRADDR\n");
16155             dres.len = 20;
16156             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
16157             goto decode_success;
16158          }
16159          else
16160          if (getUIntLittleEndianly(code+16) == 0xE18CC00C
16161                                                /* orr r12,r12,r12 */) {
16162             /*  branch-and-link-to-noredir R4 */
16163             DIP("branch-and-link-to-noredir r4\n");
16164             llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
16165             llPutIReg(15, llGetIReg(4));
16166             dres.jk_StopHere = Ijk_NoRedir;
16167             dres.whatNext    = Dis_StopHere;
16168             goto decode_success;
16169          }
16170          else
16171          if (getUIntLittleEndianly(code+16) == 0xE1899009
16172                                                /* orr r9,r9,r9 */) {
16173             /* IR injection */
16174             DIP("IR injection\n");
16175             vex_inject_ir(irsb, Iend_LE);
16176             // Invalidate the current insn. The reason is that the IRop we're
16177             // injecting here can change. In which case the translation has to
16178             // be redone. For ease of handling, we simply invalidate all the
16179             // time.
16180             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
16181             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
16182             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
16183             dres.whatNext    = Dis_StopHere;
16184             dres.jk_StopHere = Ijk_InvalICache;
16185             goto decode_success;
16186          }
16187          /* We don't know what it is.  Set opc1/opc2 so decode_failure
16188             can print the insn following the Special-insn preamble. */
16189          insn = getUIntLittleEndianly(code+16);
16190          goto decode_failure;
16191          /*NOTREACHED*/
16192       }
16193
16194    }
16195
16196    /* ----------------------------------------------------------- */
16197
16198    /* Main ARM instruction decoder starts here. */
16199
16200    /* Deal with the condition.  Strategy is to merely generate a
16201       condition temporary at this point (or IRTemp_INVALID, meaning
16202       unconditional).  We leave it to lower-level instruction decoders
16203       to decide whether they can generate straight-line code, or
16204       whether they must generate a side exit before the instruction.
16205       condT :: Ity_I32 and is always either zero or one. */
16206    condT = IRTemp_INVALID;
16207    switch ( (ARMCondcode)INSN_COND ) {
16208       case ARMCondNV: {
16209          // Illegal instruction prior to v5 (see ARM ARM A3-5), but
16210          // some cases are acceptable
16211          Bool ok
16212             = decode_NV_instruction_ARMv7_and_below(&dres, archinfo, insn);
16213          if (ok)
16214             goto decode_success;
16215          else
16216             goto after_v7_decoder;
16217       }
16218       case ARMCondAL: // Always executed
16219          break;
16220       case ARMCondEQ: case ARMCondNE: case ARMCondHS: case ARMCondLO:
16221       case ARMCondMI: case ARMCondPL: case ARMCondVS: case ARMCondVC:
16222       case ARMCondHI: case ARMCondLS: case ARMCondGE: case ARMCondLT:
16223       case ARMCondGT: case ARMCondLE:
16224          condT = newTemp(Ity_I32);
16225          assign( condT, mk_armg_calculate_condition( INSN_COND ));
16226          break;
16227    }
16228
16229    /* ----------------------------------------------------------- */
16230    /* -- ARMv5 integer instructions                            -- */
16231    /* ----------------------------------------------------------- */
16232
16233    /* ---------------- Data processing ops ------------------- */
16234
16235    if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0))
16236        && !(INSN(25,25) == 0 && INSN(7,7) == 1 && INSN(4,4) == 1)) {
16237       IRTemp  shop = IRTemp_INVALID; /* shifter operand */
16238       IRTemp  shco = IRTemp_INVALID; /* shifter carry out */
16239       UInt    rD   = (insn >> 12) & 0xF; /* 15:12 */
16240       UInt    rN   = (insn >> 16) & 0xF; /* 19:16 */
16241       UInt    bitS = (insn >> 20) & 1; /* 20:20 */
16242       IRTemp  rNt  = IRTemp_INVALID;
16243       IRTemp  res  = IRTemp_INVALID;
16244       IRTemp  oldV = IRTemp_INVALID;
16245       IRTemp  oldC = IRTemp_INVALID;
16246       const HChar*  name = NULL;
16247       IROp    op   = Iop_INVALID;
16248       Bool    ok;
16249
16250       switch (INSN(24,21)) {
16251
16252          /* --------- ADD, SUB, AND, OR --------- */
16253          case BITS4(0,1,0,0): /* ADD:  Rd = Rn + shifter_operand */
16254             name = "add"; op = Iop_Add32; goto rd_eq_rn_op_SO;
16255          case BITS4(0,0,1,0): /* SUB:  Rd = Rn - shifter_operand */
16256             name = "sub"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
16257          case BITS4(0,0,1,1): /* RSB:  Rd = shifter_operand - Rn */
16258             name = "rsb"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
16259          case BITS4(0,0,0,0): /* AND:  Rd = Rn & shifter_operand */
16260             name = "and"; op = Iop_And32; goto rd_eq_rn_op_SO;
16261          case BITS4(1,1,0,0): /* OR:   Rd = Rn | shifter_operand */
16262             name = "orr"; op = Iop_Or32; goto rd_eq_rn_op_SO;
16263          case BITS4(0,0,0,1): /* EOR:  Rd = Rn ^ shifter_operand */
16264             name = "eor"; op = Iop_Xor32; goto rd_eq_rn_op_SO;
16265          case BITS4(1,1,1,0): /* BIC:  Rd = Rn & ~shifter_operand */
16266             name = "bic"; op = Iop_And32; goto rd_eq_rn_op_SO;
16267          rd_eq_rn_op_SO: {
16268             Bool isRSB = False;
16269             Bool isBIC = False;
16270             switch (INSN(24,21)) {
16271                case BITS4(0,0,1,1):
16272                   vassert(op == Iop_Sub32); isRSB = True; break;
16273                case BITS4(1,1,1,0):
16274                   vassert(op == Iop_And32); isBIC = True; break;
16275                default:
16276                   break;
16277             }
16278             rNt = newTemp(Ity_I32);
16279             assign(rNt, getIRegA(rN));
16280             ok = mk_shifter_operand(
16281                     INSN(25,25), INSN(11,0),
16282                     &shop, bitS ? &shco : NULL, dis_buf
16283                  );
16284             if (!ok)
16285                break;
16286             res = newTemp(Ity_I32);
16287             // compute the main result
16288             if (isRSB) {
16289                // reverse-subtract: shifter_operand - Rn
16290                vassert(op == Iop_Sub32);
16291                assign(res, binop(op, mkexpr(shop), mkexpr(rNt)) );
16292             } else if (isBIC) {
16293                // andn: shifter_operand & ~Rn
16294                vassert(op == Iop_And32);
16295                assign(res, binop(op, mkexpr(rNt),
16296                                      unop(Iop_Not32, mkexpr(shop))) );
16297             } else {
16298                // normal: Rn op shifter_operand
16299                assign(res, binop(op, mkexpr(rNt), mkexpr(shop)) );
16300             }
16301             // but don't commit it until after we've finished
16302             // all necessary reads from the guest state
16303             if (bitS
16304                 && (op == Iop_And32 || op == Iop_Or32 || op == Iop_Xor32)) {
16305                oldV = newTemp(Ity_I32);
16306                assign( oldV, mk_armg_calculate_flag_v() );
16307             }
16308             // can't safely read guest state after here
16309             // now safe to put the main result
16310             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
16311             // XXXX!! not safe to read any guest state after
16312             // this point (I think the code below doesn't do that).
16313             if (!bitS)
16314                vassert(shco == IRTemp_INVALID);
16315             /* Update the flags thunk if necessary */
16316             if (bitS) {
16317                vassert(shco != IRTemp_INVALID);
16318                switch (op) {
16319                   case Iop_Add32:
16320                      setFlags_D1_D2( ARMG_CC_OP_ADD, rNt, shop, condT );
16321                      break;
16322                   case Iop_Sub32:
16323                      if (isRSB) {
16324                         setFlags_D1_D2( ARMG_CC_OP_SUB, shop, rNt, condT );
16325                      } else {
16326                         setFlags_D1_D2( ARMG_CC_OP_SUB, rNt, shop, condT );
16327                      }
16328                      break;
16329                   case Iop_And32: /* BIC and AND set the flags the same */
16330                   case Iop_Or32:
16331                   case Iop_Xor32:
16332                      // oldV has been read just above
16333                      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
16334                                         res, shco, oldV, condT );
16335                      break;
16336                   default:
16337                      vassert(0);
16338                }
16339             }
16340             DIP("%s%s%s r%u, r%u, %s\n",
16341                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
16342             goto decode_success;
16343          }
16344
16345          /* --------- MOV, MVN --------- */
16346          case BITS4(1,1,0,1):   /* MOV: Rd = shifter_operand */
16347          case BITS4(1,1,1,1): { /* MVN: Rd = not(shifter_operand) */
16348             Bool isMVN = INSN(24,21) == BITS4(1,1,1,1);
16349             IRTemp jk = Ijk_Boring;
16350             if (rN != 0)
16351                break; /* rN must be zero */
16352             ok = mk_shifter_operand(
16353                     INSN(25,25), INSN(11,0),
16354                     &shop, bitS ? &shco : NULL, dis_buf
16355                  );
16356             if (!ok)
16357                break;
16358             res = newTemp(Ity_I32);
16359             assign( res, isMVN ? unop(Iop_Not32, mkexpr(shop))
16360                                : mkexpr(shop) );
16361             if (bitS) {
16362                vassert(shco != IRTemp_INVALID);
16363                oldV = newTemp(Ity_I32);
16364                assign( oldV, mk_armg_calculate_flag_v() );
16365             } else {
16366                vassert(shco == IRTemp_INVALID);
16367             }
16368             /* According to the Cortex A8 TRM Sec. 5.2.1, MOV PC, r14 is a
16369                 return for purposes of branch prediction. */
16370             if (!isMVN && INSN(11,0) == 14) {
16371               jk = Ijk_Ret;
16372             }
16373             // can't safely read guest state after here
16374             putIRegA( rD, mkexpr(res), condT, jk );
16375             /* Update the flags thunk if necessary */
16376             if (bitS) {
16377                setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
16378                                   res, shco, oldV, condT );
16379             }
16380             DIP("%s%s%s r%u, %s\n",
16381                 isMVN ? "mvn" : "mov",
16382                 nCC(INSN_COND), bitS ? "s" : "", rD, dis_buf );
16383             goto decode_success;
16384          }
16385
16386          /* --------- CMP --------- */
16387          case BITS4(1,0,1,0):   /* CMP:  (void) Rn - shifter_operand */
16388          case BITS4(1,0,1,1): { /* CMN:  (void) Rn + shifter_operand */
16389             Bool isCMN = INSN(24,21) == BITS4(1,0,1,1);
16390             if (rD != 0)
16391                break; /* rD must be zero */
16392             if (bitS == 0)
16393                break; /* if S (bit 20) is not set, it's not CMP/CMN */
16394             rNt = newTemp(Ity_I32);
16395             assign(rNt, getIRegA(rN));
16396             ok = mk_shifter_operand(
16397                     INSN(25,25), INSN(11,0),
16398                     &shop, NULL, dis_buf
16399                  );
16400             if (!ok)
16401                break;
16402             // can't safely read guest state after here
16403             /* Update the flags thunk. */
16404             setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
16405                             rNt, shop, condT );
16406             DIP("%s%s r%u, %s\n",
16407                 isCMN ? "cmn" : "cmp",
16408                 nCC(INSN_COND), rN, dis_buf );
16409             goto decode_success;
16410          }
16411
16412          /* --------- TST --------- */
16413          case BITS4(1,0,0,0):   /* TST:  (void) Rn & shifter_operand */
16414          case BITS4(1,0,0,1): { /* TEQ:  (void) Rn ^ shifter_operand */
16415             Bool isTEQ = INSN(24,21) == BITS4(1,0,0,1);
16416             if (rD != 0)
16417                break; /* rD must be zero */
16418             if (bitS == 0)
16419                break; /* if S (bit 20) is not set, it's not TST/TEQ */
16420             rNt = newTemp(Ity_I32);
16421             assign(rNt, getIRegA(rN));
16422             ok = mk_shifter_operand(
16423                     INSN(25,25), INSN(11,0),
16424                     &shop, &shco, dis_buf
16425                  );
16426             if (!ok)
16427                break;
16428             /* Update the flags thunk. */
16429             res = newTemp(Ity_I32);
16430             assign( res, binop(isTEQ ? Iop_Xor32 : Iop_And32,
16431                                mkexpr(rNt), mkexpr(shop)) );
16432             oldV = newTemp(Ity_I32);
16433             assign( oldV, mk_armg_calculate_flag_v() );
16434             // can't safely read guest state after here
16435             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
16436                                res, shco, oldV, condT );
16437             DIP("%s%s r%u, %s\n",
16438                 isTEQ ? "teq" : "tst",
16439                 nCC(INSN_COND), rN, dis_buf );
16440             goto decode_success;
16441          }
16442
16443          /* --------- ADC, SBC, RSC --------- */
16444          case BITS4(0,1,0,1): /* ADC:  Rd = Rn + shifter_operand + oldC */
16445             name = "adc"; goto rd_eq_rn_op_SO_op_oldC;
16446          case BITS4(0,1,1,0): /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
16447             name = "sbc"; goto rd_eq_rn_op_SO_op_oldC;
16448          case BITS4(0,1,1,1): /* RSC:  Rd = shifter_operand - Rn - (oldC ^ 1) */
16449             name = "rsc"; goto rd_eq_rn_op_SO_op_oldC;
16450          rd_eq_rn_op_SO_op_oldC: {
16451             // FIXME: shco isn't used for anything.  Get rid of it.
16452             rNt = newTemp(Ity_I32);
16453             assign(rNt, getIRegA(rN));
16454             ok = mk_shifter_operand(
16455                     INSN(25,25), INSN(11,0),
16456                     &shop, bitS ? &shco : NULL, dis_buf
16457                  );
16458             if (!ok)
16459                break;
16460             oldC = newTemp(Ity_I32);
16461             assign( oldC, mk_armg_calculate_flag_c() );
16462             res = newTemp(Ity_I32);
16463             // compute the main result
16464             switch (INSN(24,21)) {
16465                case BITS4(0,1,0,1): /* ADC */
16466                   assign(res,
16467                          binop(Iop_Add32,
16468                                binop(Iop_Add32, mkexpr(rNt), mkexpr(shop)),
16469                                mkexpr(oldC) ));
16470                   break;
16471                case BITS4(0,1,1,0): /* SBC */
16472                   assign(res,
16473                          binop(Iop_Sub32,
16474                                binop(Iop_Sub32, mkexpr(rNt), mkexpr(shop)),
16475                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
16476                   break;
16477                case BITS4(0,1,1,1): /* RSC */
16478                   assign(res,
16479                          binop(Iop_Sub32,
16480                                binop(Iop_Sub32, mkexpr(shop), mkexpr(rNt)),
16481                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
16482                   break;
16483                default:
16484                   vassert(0);
16485             }
16486             // but don't commit it until after we've finished
16487             // all necessary reads from the guest state
16488             // now safe to put the main result
16489             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
16490             // XXXX!! not safe to read any guest state after
16491             // this point (I think the code below doesn't do that).
16492             if (!bitS)
16493                vassert(shco == IRTemp_INVALID);
16494             /* Update the flags thunk if necessary */
16495             if (bitS) {
16496                vassert(shco != IRTemp_INVALID);
16497                switch (INSN(24,21)) {
16498                   case BITS4(0,1,0,1): /* ADC */
16499                      setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
16500                                         rNt, shop, oldC, condT );
16501                      break;
16502                   case BITS4(0,1,1,0): /* SBC */
16503                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
16504                                         rNt, shop, oldC, condT );
16505                      break;
16506                   case BITS4(0,1,1,1): /* RSC */
16507                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
16508                                         shop, rNt, oldC, condT );
16509                      break;
16510                   default:
16511                      vassert(0);
16512                }
16513             }
16514             DIP("%s%s%s r%u, r%u, %s\n",
16515                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
16516             goto decode_success;
16517          }
16518
16519          default:
16520             vassert(0);
16521       }
16522    } /* if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0)) */
16523
16524    /* --------------------- Load/store (ubyte & word) -------- */
16525    // LDR STR LDRB STRB
16526    /*                 31   27   23   19 15 11    6   4 3  # highest bit
16527                         28   24   20 16 12
16528       A5-20   1 | 16  cond 0101 UB0L Rn Rd imm12
16529       A5-22   1 | 32  cond 0111 UBOL Rn Rd imm5  sh2 0 Rm
16530       A5-24   2 | 16  cond 0101 UB1L Rn Rd imm12
16531       A5-26   2 | 32  cond 0111 UB1L Rn Rd imm5  sh2 0 Rm
16532       A5-28   3 | 16  cond 0100 UB0L Rn Rd imm12
16533       A5-32   3 | 32  cond 0110 UB0L Rn Rd imm5  sh2 0 Rm
16534    */
16535    /* case coding:
16536              1   at-ea               (access at ea)
16537              2   at-ea-then-upd      (access at ea, then Rn = ea)
16538              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
16539       ea coding
16540              16  Rn +/- imm12
16541              32  Rn +/- Rm sh2 imm5
16542    */
16543    /* Quickly skip over all of this for hopefully most instructions */
16544    if ((INSN(27,24) & BITS4(1,1,0,0)) != BITS4(0,1,0,0))
16545       goto after_load_store_ubyte_or_word;
16546
16547    summary = 0;
16548
16549    /**/ if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 0) {
16550       summary = 1 | 16;
16551    }
16552    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 0
16553                                           && INSN(4,4) == 0) {
16554       summary = 1 | 32;
16555    }
16556    else if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 1) {
16557       summary = 2 | 16;
16558    }
16559    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 1
16560                                           && INSN(4,4) == 0) {
16561       summary = 2 | 32;
16562    }
16563    else if (INSN(27,24) == BITS4(0,1,0,0) && INSN(21,21) == 0) {
16564       summary = 3 | 16;
16565    }
16566    else if (INSN(27,24) == BITS4(0,1,1,0) && INSN(21,21) == 0
16567                                           && INSN(4,4) == 0) {
16568       summary = 3 | 32;
16569    }
16570    else goto after_load_store_ubyte_or_word;
16571
16572    { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
16573      UInt rD = (insn >> 12) & 0xF; /* 15:12 */
16574      UInt rM = (insn >> 0)  & 0xF; /*  3:0  */
16575      UInt bU = (insn >> 23) & 1;      /* 23 */
16576      UInt bB = (insn >> 22) & 1;      /* 22 */
16577      UInt bL = (insn >> 20) & 1;      /* 20 */
16578      UInt imm12 = (insn >> 0) & 0xFFF; /* 11:0 */
16579      UInt imm5  = (insn >> 7) & 0x1F;  /* 11:7 */
16580      UInt sh2   = (insn >> 5) & 3;     /* 6:5 */
16581
16582      /* Skip some invalid cases, which would lead to two competing
16583         updates to the same register, or which are otherwise
16584         disallowed by the spec. */
16585      switch (summary) {
16586         case 1 | 16:
16587            break;
16588         case 1 | 32:
16589            if (rM == 15) goto after_load_store_ubyte_or_word;
16590            break;
16591         case 2 | 16: case 3 | 16:
16592            if (rN == 15) goto after_load_store_ubyte_or_word;
16593            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
16594            break;
16595         case 2 | 32: case 3 | 32:
16596            if (rM == 15) goto after_load_store_ubyte_or_word;
16597            if (rN == 15) goto after_load_store_ubyte_or_word;
16598            if (rN == rM) goto after_load_store_ubyte_or_word;
16599            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
16600            break;
16601         default:
16602            vassert(0);
16603      }
16604
16605      /* compute the effective address.  Bind it to a tmp since we
16606         may need to use it twice. */
16607      IRExpr* eaE = NULL;
16608      switch (summary & 0xF0) {
16609         case 16:
16610            eaE = mk_EA_reg_plusminus_imm12( rN, bU, imm12, dis_buf );
16611            break;
16612         case 32:
16613            eaE = mk_EA_reg_plusminus_shifted_reg( rN, bU, rM, sh2, imm5,
16614                                                   dis_buf );
16615            break;
16616      }
16617      vassert(eaE);
16618      IRTemp eaT = newTemp(Ity_I32);
16619      assign(eaT, eaE);
16620
16621      /* get the old Rn value */
16622      IRTemp rnT = newTemp(Ity_I32);
16623      assign(rnT, getIRegA(rN));
16624
16625      /* decide on the transfer address */
16626      IRTemp taT = IRTemp_INVALID;
16627      switch (summary & 0x0F) {
16628         case 1: case 2: taT = eaT; break;
16629         case 3:         taT = rnT; break;
16630      }
16631      vassert(taT != IRTemp_INVALID);
16632
16633      if (bL == 0) {
16634        /* Store.  If necessary, update the base register before the
16635           store itself, so that the common idiom of "str rX, [sp,
16636           #-4]!" (store rX at sp-4, then do new sp = sp-4, a.k.a "push
16637           rX") doesn't cause Memcheck to complain that the access is
16638           below the stack pointer.  Also, not updating sp before the
16639           store confuses Valgrind's dynamic stack-extending logic.  So
16640           do it before the store.  Hence we need to snarf the store
16641           data before doing the basereg update. */
16642
16643         /* get hold of the data to be stored */
16644         IRTemp rDt = newTemp(Ity_I32);
16645         assign(rDt, getIRegA(rD));
16646
16647         /* Update Rn if necessary. */
16648         switch (summary & 0x0F) {
16649            case 2: case 3:
16650               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16651               break;
16652         }
16653
16654         /* generate the transfer */
16655         if (bB == 0) { // word store
16656            storeGuardedLE( mkexpr(taT), mkexpr(rDt), condT );
16657         } else { // byte store
16658            vassert(bB == 1);
16659            storeGuardedLE( mkexpr(taT), unop(Iop_32to8, mkexpr(rDt)), condT );
16660         }
16661
16662      } else {
16663         /* Load */
16664         vassert(bL == 1);
16665
16666         /* generate the transfer */
16667         if (bB == 0) { // word load
16668            IRTemp jk = Ijk_Boring;
16669            /* According to the Cortex A8 TRM Sec. 5.2.1, LDR(1) with r13 as the
16670                base register and PC as the destination register is a return for
16671                purposes of branch prediction.
16672               The ARM ARM Sec. C9.10.1 further specifies that it must use a
16673                post-increment by immediate addressing mode to be counted in
16674                event 0x0E (Procedure return).*/
16675            if (rN == 13 && summary == (3 | 16) && bB == 0) {
16676               jk = Ijk_Ret;
16677            }
16678            IRTemp tD = newTemp(Ity_I32);
16679            loadGuardedLE( tD, ILGop_Ident32,
16680                           mkexpr(taT), llGetIReg(rD), condT );
16681            /* "rD == 15 ? condT : IRTemp_INVALID": simply
16682               IRTemp_INVALID would be correct in all cases here, and
16683               for the non-r15 case it generates better code, by
16684               avoiding two tests of the cond (since it is already
16685               tested by loadGuardedLE).  However, the logic at the end
16686               of this function, that deals with writes to r15, has an
16687               optimisation which depends on seeing whether or not the
16688               write is conditional.  Hence in this particular case we
16689               let it "see" the guard condition. */
16690            putIRegA( rD, mkexpr(tD),
16691                      rD == 15 ? condT : IRTemp_INVALID, jk );
16692         } else { // byte load
16693            vassert(bB == 1);
16694            IRTemp tD = newTemp(Ity_I32);
16695            loadGuardedLE( tD, ILGop_8Uto32, mkexpr(taT), llGetIReg(rD), condT );
16696            /* No point in similar 3rd arg complexity here, since we
16697               can't sanely write anything to r15 like this. */
16698            putIRegA( rD, mkexpr(tD), IRTemp_INVALID, Ijk_Boring );
16699         }
16700
16701         /* Update Rn if necessary. */
16702         switch (summary & 0x0F) {
16703            case 2: case 3:
16704               // should be assured by logic above:
16705               if (bL == 1)
16706                  vassert(rD != rN); /* since we just wrote rD */
16707               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16708               break;
16709         }
16710      }
16711
16712      switch (summary & 0x0F) {
16713         case 1:  DIP("%sr%s%s r%u, %s\n",
16714                      bL == 0 ? "st" : "ld",
16715                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
16716                  break;
16717         case 2:  DIP("%sr%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
16718                      bL == 0 ? "st" : "ld",
16719                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
16720                  break;
16721         case 3:  DIP("%sr%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
16722                      bL == 0 ? "st" : "ld",
16723                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
16724                  break;
16725         default: vassert(0);
16726      }
16727
16728      /* XXX deal with alignment constraints */
16729
16730      goto decode_success;
16731
16732      /* Complications:
16733
16734         For all loads: if the Amode specifies base register
16735         writeback, and the same register is specified for Rd and Rn,
16736         the results are UNPREDICTABLE.
16737
16738         For all loads and stores: if R15 is written, branch to
16739         that address afterwards.
16740
16741         STRB: straightforward
16742         LDRB: loaded data is zero extended
16743         STR:  lowest 2 bits of address are ignored
16744         LDR:  if the lowest 2 bits of the address are nonzero
16745               then the loaded value is rotated right by 8 * the lowest 2 bits
16746      */
16747    }
16748
16749   after_load_store_ubyte_or_word:
16750
16751    /* --------------------- Load/store (sbyte & hword) -------- */
16752    // LDRH LDRSH STRH LDRSB
16753    /*                 31   27   23   19 15 11   7    3     # highest bit
16754                         28   24   20 16 12    8    4    0
16755       A5-36   1 | 16  cond 0001 U10L Rn Rd im4h 1SH1 im4l
16756       A5-38   1 | 32  cond 0001 U00L Rn Rd 0000 1SH1 Rm
16757       A5-40   2 | 16  cond 0001 U11L Rn Rd im4h 1SH1 im4l
16758       A5-42   2 | 32  cond 0001 U01L Rn Rd 0000 1SH1 Rm
16759       A5-44   3 | 16  cond 0000 U10L Rn Rd im4h 1SH1 im4l
16760       A5-46   3 | 32  cond 0000 U00L Rn Rd 0000 1SH1 Rm
16761    */
16762    /* case coding:
16763              1   at-ea               (access at ea)
16764              2   at-ea-then-upd      (access at ea, then Rn = ea)
16765              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
16766       ea coding
16767              16  Rn +/- imm8
16768              32  Rn +/- Rm
16769    */
16770    /* Quickly skip over all of this for hopefully most instructions */
16771    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
16772       goto after_load_store_sbyte_or_hword;
16773
16774    /* Check the "1SH1" thing. */
16775    if ((INSN(7,4) & BITS4(1,0,0,1)) != BITS4(1,0,0,1))
16776       goto after_load_store_sbyte_or_hword;
16777
16778    summary = 0;
16779
16780    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,0)) {
16781       summary = 1 | 16;
16782    }
16783    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,0)) {
16784       summary = 1 | 32;
16785    }
16786    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,1)) {
16787       summary = 2 | 16;
16788    }
16789    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,1)) {
16790       summary = 2 | 32;
16791    }
16792    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(1,0)) {
16793       summary = 3 | 16;
16794    }
16795    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(0,0)) {
16796       summary = 3 | 32;
16797    }
16798    else goto after_load_store_sbyte_or_hword;
16799
16800    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
16801      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
16802      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
16803      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
16804      UInt bL   = (insn >> 20) & 1;   /* 20 L=1 load, L=0 store */
16805      UInt bH   = (insn >> 5) & 1;    /* H=1 halfword, H=0 byte */
16806      UInt bS   = (insn >> 6) & 1;    /* S=1 signed, S=0 unsigned */
16807      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
16808
16809      /* Skip combinations that are either meaningless or already
16810         handled by main word-or-unsigned-byte load-store
16811         instructions. */
16812      if (bS == 0 && bH == 0) /* "unsigned byte" */
16813         goto after_load_store_sbyte_or_hword;
16814      if (bS == 1 && bL == 0) /* "signed store" */
16815         goto after_load_store_sbyte_or_hword;
16816
16817      /* Require 11:8 == 0 for Rn +/- Rm cases */
16818      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
16819         goto after_load_store_sbyte_or_hword;
16820
16821      /* Skip some invalid cases, which would lead to two competing
16822         updates to the same register, or which are otherwise
16823         disallowed by the spec. */
16824      switch (summary) {
16825         case 1 | 16:
16826            break;
16827         case 1 | 32:
16828            if (rM == 15) goto after_load_store_sbyte_or_hword;
16829            break;
16830         case 2 | 16: case 3 | 16:
16831            if (rN == 15) goto after_load_store_sbyte_or_hword;
16832            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
16833            break;
16834         case 2 | 32: case 3 | 32:
16835            if (rM == 15) goto after_load_store_sbyte_or_hword;
16836            if (rN == 15) goto after_load_store_sbyte_or_hword;
16837            if (rN == rM) goto after_load_store_sbyte_or_hword;
16838            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
16839            break;
16840         default:
16841            vassert(0);
16842      }
16843
16844      /* If this is a branch, make it unconditional at this point.
16845         Doing conditional branches in-line is too complex (for now).
16846         Note that you'd have to be insane to use any of these loads to
16847         do a branch, since they only load 16 bits at most, but we
16848         handle it just in case. */
16849      if (bL == 1 && rD == 15 && condT != IRTemp_INVALID) {
16850         // go uncond
16851         mk_skip_over_A32_if_cond_is_false( condT );
16852         condT = IRTemp_INVALID;
16853         // now uncond
16854      }
16855
16856      /* compute the effective address.  Bind it to a tmp since we
16857         may need to use it twice. */
16858      IRExpr* eaE = NULL;
16859      switch (summary & 0xF0) {
16860         case 16:
16861            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
16862            break;
16863         case 32:
16864            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
16865            break;
16866      }
16867      vassert(eaE);
16868      IRTemp eaT = newTemp(Ity_I32);
16869      assign(eaT, eaE);
16870
16871      /* get the old Rn value */
16872      IRTemp rnT = newTemp(Ity_I32);
16873      assign(rnT, getIRegA(rN));
16874
16875      /* decide on the transfer address */
16876      IRTemp taT = IRTemp_INVALID;
16877      switch (summary & 0x0F) {
16878         case 1: case 2: taT = eaT; break;
16879         case 3:         taT = rnT; break;
16880      }
16881      vassert(taT != IRTemp_INVALID);
16882
16883      /* ll previous value of rD, for dealing with conditional loads */
16884      IRTemp llOldRd = newTemp(Ity_I32);
16885      assign(llOldRd, llGetIReg(rD));
16886
16887      /* halfword store  H 1  L 0  S 0
16888         uhalf load      H 1  L 1  S 0
16889         shalf load      H 1  L 1  S 1
16890         sbyte load      H 0  L 1  S 1
16891      */
16892      const HChar* name = NULL;
16893      /* generate the transfer */
16894      /**/ if (bH == 1 && bL == 0 && bS == 0) { // halfword store
16895         storeGuardedLE( mkexpr(taT),
16896                         unop(Iop_32to16, getIRegA(rD)), condT );
16897         name = "strh";
16898      }
16899      else if (bH == 1 && bL == 1 && bS == 0) { // uhalf load
16900         IRTemp newRd = newTemp(Ity_I32);
16901         loadGuardedLE( newRd, ILGop_16Uto32,
16902                        mkexpr(taT), mkexpr(llOldRd), condT );
16903         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
16904         name = "ldrh";
16905      }
16906      else if (bH == 1 && bL == 1 && bS == 1) { // shalf load
16907         IRTemp newRd = newTemp(Ity_I32);
16908         loadGuardedLE( newRd, ILGop_16Sto32,
16909                        mkexpr(taT), mkexpr(llOldRd), condT );
16910         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
16911         name = "ldrsh";
16912      }
16913      else if (bH == 0 && bL == 1 && bS == 1) { // sbyte load
16914         IRTemp newRd = newTemp(Ity_I32);
16915         loadGuardedLE( newRd, ILGop_8Sto32,
16916                        mkexpr(taT), mkexpr(llOldRd), condT );
16917         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
16918         name = "ldrsb";
16919      }
16920      else
16921         vassert(0); // should be assured by logic above
16922
16923      /* Update Rn if necessary. */
16924      switch (summary & 0x0F) {
16925         case 2: case 3:
16926            // should be assured by logic above:
16927            if (bL == 1)
16928               vassert(rD != rN); /* since we just wrote rD */
16929            putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16930            break;
16931      }
16932
16933      switch (summary & 0x0F) {
16934         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
16935                  break;
16936         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
16937                      name, nCC(INSN_COND), rD, dis_buf);
16938                  break;
16939         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
16940                      name, nCC(INSN_COND), rD, dis_buf);
16941                  break;
16942         default: vassert(0);
16943      }
16944
16945      /* XXX deal with alignment constraints */
16946
16947      goto decode_success;
16948
16949      /* Complications:
16950
16951         For all loads: if the Amode specifies base register
16952         writeback, and the same register is specified for Rd and Rn,
16953         the results are UNPREDICTABLE.
16954
16955         For all loads and stores: if R15 is written, branch to
16956         that address afterwards.
16957
16958         Misaligned halfword stores => Unpredictable
16959         Misaligned halfword loads  => Unpredictable
16960      */
16961    }
16962
16963   after_load_store_sbyte_or_hword:
16964
16965    /* --------------------- Load/store multiple -------------- */
16966    // LD/STMIA LD/STMIB LD/STMDA LD/STMDB
16967    // Remarkably complex and difficult to get right
16968    // match 27:20 as 100XX0WL
16969    if (BITS8(1,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,1,0,0))) {
16970       // A5-50 LD/STMIA  cond 1000 10WL Rn RegList
16971       // A5-51 LD/STMIB  cond 1001 10WL Rn RegList
16972       // A5-53 LD/STMDA  cond 1000 00WL Rn RegList
16973       // A5-53 LD/STMDB  cond 1001 00WL Rn RegList
16974       //                   28   24   20 16       0
16975
16976       UInt bINC    = (insn >> 23) & 1;
16977       UInt bBEFORE = (insn >> 24) & 1;
16978
16979       UInt bL      = (insn >> 20) & 1;  /* load=1, store=0 */
16980       UInt bW      = (insn >> 21) & 1;  /* Rn wback=1, no wback=0 */
16981       UInt rN      = (insn >> 16) & 0xF;
16982       UInt regList = insn & 0xFFFF;
16983       /* Skip some invalid cases, which would lead to two competing
16984          updates to the same register, or which are otherwise
16985          disallowed by the spec.  Note the test above has required
16986          that S == 0, since that looks like a kernel-mode only thing.
16987          Done by forcing the real pattern, viz 100XXSWL to actually be
16988          100XX0WL. */
16989       if (rN == 15) goto after_load_store_multiple;
16990       // reglist can't be empty
16991       if (regList == 0) goto after_load_store_multiple;
16992       // if requested to writeback Rn, and this is a load instruction,
16993       // then Rn can't appear in RegList, since we'd have two competing
16994       // new values for Rn.  We do however accept this case for store
16995       // instructions.
16996       if (bW == 1 && bL == 1 && ((1 << rN) & regList) > 0)
16997          goto after_load_store_multiple;
16998
16999       /* Now, we can't do a conditional load or store, since that very
17000          likely will generate an exception.  So we have to take a side
17001          exit at this point if the condition is false. */
17002       if (condT != IRTemp_INVALID) {
17003          mk_skip_over_A32_if_cond_is_false( condT );
17004          condT = IRTemp_INVALID;
17005       }
17006
17007       /* Ok, now we're unconditional.  Generate the IR. */
17008       mk_ldm_stm( True/*arm*/, rN, bINC, bBEFORE, bW, bL, regList );
17009
17010       DIP("%sm%c%c%s r%u%s, {0x%04x}\n",
17011           bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
17012           nCC(INSN_COND),
17013           rN, bW ? "!" : "", regList);
17014
17015       goto decode_success;
17016    }
17017
17018   after_load_store_multiple:
17019
17020    /* --------------------- Control flow --------------------- */
17021    // B, BL (Branch, or Branch-and-Link, to immediate offset)
17022    //
17023    if (BITS8(1,0,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))) {
17024       UInt link   = (insn >> 24) & 1;
17025       UInt uimm24 = insn & ((1<<24)-1);  uimm24 <<= 8;
17026       Int  simm24 = (Int)uimm24;         simm24 >>= 8;
17027       UInt dst    = guest_R15_curr_instr_notENC + 8 + (((UInt)simm24) << 2);
17028       IRJumpKind jk = link ? Ijk_Call : Ijk_Boring;
17029       if (link) {
17030          putIRegA(14, mkU32(guest_R15_curr_instr_notENC + 4),
17031                       condT, Ijk_Boring);
17032       }
17033       if (condT == IRTemp_INVALID) {
17034          /* Unconditional transfer to 'dst'.  Terminate the SB at this point. */
17035          llPutIReg(15, mkU32(dst));
17036          dres.jk_StopHere = jk;
17037          dres.whatNext    = Dis_StopHere;
17038          DIP("b%s 0x%x\n", link ? "l" : "", dst);
17039       } else {
17040          /* Conditional transfer to 'dst'.  Terminate the SB at this point. */
17041          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
17042                             jk, IRConst_U32(dst), OFFB_R15T ));
17043          llPutIReg(15, mkU32(guest_R15_curr_instr_notENC + 4));
17044          dres.jk_StopHere = Ijk_Boring;
17045          dres.whatNext    = Dis_StopHere;
17046          DIP("b%s%s 0x%x\n", link ? "l" : "", nCC(INSN_COND), dst);
17047       }
17048       goto decode_success;
17049    }
17050
17051    // B, BL (Branch, or Branch-and-Link, to a register)
17052    // NB: interworking branch
17053    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
17054        && INSN(19,12) == BITS8(1,1,1,1,1,1,1,1)
17055        && (INSN(11,4) == BITS8(1,1,1,1,0,0,1,1)
17056            || INSN(11,4) == BITS8(1,1,1,1,0,0,0,1))) {
17057       IRTemp  dst = newTemp(Ity_I32);
17058       UInt    link = (INSN(11,4) >> 1) & 1;
17059       UInt    rM   = INSN(3,0);
17060       // we don't decode the case (link && rM == 15), as that's
17061       // Unpredictable.
17062       if (!(link && rM == 15)) {
17063          if (condT != IRTemp_INVALID) {
17064             mk_skip_over_A32_if_cond_is_false( condT );
17065          }
17066          // rM contains an interworking address exactly as we require
17067          // (with continuation CPSR.T in bit 0), so we can use it
17068          // as-is, with no masking.
17069          assign( dst, getIRegA(rM) );
17070          if (link) {
17071             putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
17072                       IRTemp_INVALID/*because AL*/, Ijk_Boring );
17073          }
17074          llPutIReg(15, mkexpr(dst));
17075          dres.jk_StopHere = link ? Ijk_Call
17076                                  : (rM == 14 ? Ijk_Ret : Ijk_Boring);
17077          dres.whatNext    = Dis_StopHere;
17078          if (condT == IRTemp_INVALID) {
17079             DIP("b%sx r%u\n", link ? "l" : "", rM);
17080          } else {
17081             DIP("b%sx%s r%u\n", link ? "l" : "", nCC(INSN_COND), rM);
17082          }
17083          goto decode_success;
17084       }
17085       /* else: (link && rM == 15): just fall through */
17086    }
17087
17088    /* --- NB: ARM interworking branches are in NV space, hence
17089       are handled elsewhere by decode_NV_instruction_ARMv7_and_below.
17090       ---
17091    */
17092
17093    /* --------------------- Clz --------------------- */
17094    // CLZ
17095    if (INSN(27,20) == BITS8(0,0,0,1,0,1,1,0)
17096        && INSN(19,16) == BITS4(1,1,1,1)
17097        && INSN(11,4) == BITS8(1,1,1,1,0,0,0,1)) {
17098       UInt rD = INSN(15,12);
17099       UInt rM = INSN(3,0);
17100       IRTemp arg = newTemp(Ity_I32);
17101       IRTemp res = newTemp(Ity_I32);
17102       assign(arg, getIRegA(rM));
17103       assign(res, IRExpr_ITE(
17104                      binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
17105                      mkU32(32),
17106                      unop(Iop_Clz32, mkexpr(arg))
17107             ));
17108       putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17109       DIP("clz%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
17110       goto decode_success;
17111    }
17112
17113    /* --------------------- Mul etc --------------------- */
17114    // MUL
17115    if (BITS8(0,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
17116        && INSN(15,12) == BITS4(0,0,0,0)
17117        && INSN(7,4) == BITS4(1,0,0,1)) {
17118       UInt bitS = (insn >> 20) & 1; /* 20:20 */
17119       UInt rD = INSN(19,16);
17120       UInt rS = INSN(11,8);
17121       UInt rM = INSN(3,0);
17122       if (rD == 15 || rM == 15 || rS == 15) {
17123          /* Unpredictable; don't decode; fall through */
17124       } else {
17125          IRTemp argL = newTemp(Ity_I32);
17126          IRTemp argR = newTemp(Ity_I32);
17127          IRTemp res  = newTemp(Ity_I32);
17128          IRTemp oldC = IRTemp_INVALID;
17129          IRTemp oldV = IRTemp_INVALID;
17130          assign( argL, getIRegA(rM));
17131          assign( argR, getIRegA(rS));
17132          assign( res, binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) );
17133          if (bitS) {
17134             oldC = newTemp(Ity_I32);
17135             assign(oldC, mk_armg_calculate_flag_c());
17136             oldV = newTemp(Ity_I32);
17137             assign(oldV, mk_armg_calculate_flag_v());
17138          }
17139          // now update guest state
17140          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
17141          if (bitS) {
17142             IRTemp pair = newTemp(Ity_I32);
17143             assign( pair, binop(Iop_Or32,
17144                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17145                                 mkexpr(oldV)) );
17146             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
17147          }
17148          DIP("mul%c%s r%u, r%u, r%u\n",
17149              bitS ? 's' : ' ', nCC(INSN_COND), rD, rM, rS);
17150          goto decode_success;
17151       }
17152       /* fall through */
17153    }
17154
17155    /* --------------------- Integer Divides --------------------- */
17156    // SDIV
17157    if (BITS8(0,1,1,1,0,0,0,1) == INSN(27,20)
17158        && INSN(15,12) == BITS4(1,1,1,1)
17159        && INSN(7,4) == BITS4(0,0,0,1)) {
17160       UInt rD = INSN(19,16);
17161       UInt rM = INSN(11,8);
17162       UInt rN = INSN(3,0);
17163       if (rD == 15 || rM == 15 || rN == 15) {
17164          /* Unpredictable; don't decode; fall through */
17165       } else {
17166          IRTemp res  = newTemp(Ity_I32);
17167          IRTemp argL = newTemp(Ity_I32);
17168          IRTemp argR = newTemp(Ity_I32);
17169          assign(argL, getIRegA(rN));
17170          assign(argR, getIRegA(rM));
17171          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
17172          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17173          DIP("sdiv r%u, r%u, r%u\n", rD, rN, rM);
17174          goto decode_success;
17175       }
17176     }
17177
17178    // UDIV
17179    if (BITS8(0,1,1,1,0,0,1,1) == INSN(27,20)
17180        && INSN(15,12) == BITS4(1,1,1,1)
17181        && INSN(7,4) == BITS4(0,0,0,1)) {
17182       UInt rD = INSN(19,16);
17183       UInt rM = INSN(11,8);
17184       UInt rN = INSN(3,0);
17185       if (rD == 15 || rM == 15 || rN == 15) {
17186          /* Unpredictable; don't decode; fall through */
17187       } else {
17188          IRTemp res  = newTemp(Ity_I32);
17189          IRTemp argL = newTemp(Ity_I32);
17190          IRTemp argR = newTemp(Ity_I32);
17191          assign(argL, getIRegA(rN));
17192          assign(argR, getIRegA(rM));
17193          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
17194          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17195          DIP("udiv r%u, r%u, r%u\n", rD, rN, rM);
17196          goto decode_success;
17197       }
17198    }
17199
17200    // MLA, MLS
17201    if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17202        && INSN(7,4) == BITS4(1,0,0,1)) {
17203       UInt bitS  = (insn >> 20) & 1; /* 20:20 */
17204       UInt isMLS = (insn >> 22) & 1; /* 22:22 */
17205       UInt rD = INSN(19,16);
17206       UInt rN = INSN(15,12);
17207       UInt rS = INSN(11,8);
17208       UInt rM = INSN(3,0);
17209       if (bitS == 1 && isMLS == 1) {
17210          /* This isn't allowed (MLS that sets flags).  don't decode;
17211             fall through */
17212       }
17213       else
17214       if (rD == 15 || rM == 15 || rS == 15 || rN == 15) {
17215          /* Unpredictable; don't decode; fall through */
17216       } else {
17217          IRTemp argL = newTemp(Ity_I32);
17218          IRTemp argR = newTemp(Ity_I32);
17219          IRTemp argP = newTemp(Ity_I32);
17220          IRTemp res  = newTemp(Ity_I32);
17221          IRTemp oldC = IRTemp_INVALID;
17222          IRTemp oldV = IRTemp_INVALID;
17223          assign( argL, getIRegA(rM));
17224          assign( argR, getIRegA(rS));
17225          assign( argP, getIRegA(rN));
17226          assign( res, binop(isMLS ? Iop_Sub32 : Iop_Add32,
17227                             mkexpr(argP),
17228                             binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) ));
17229          if (bitS) {
17230             vassert(!isMLS); // guaranteed above
17231             oldC = newTemp(Ity_I32);
17232             assign(oldC, mk_armg_calculate_flag_c());
17233             oldV = newTemp(Ity_I32);
17234             assign(oldV, mk_armg_calculate_flag_v());
17235          }
17236          // now update guest state
17237          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
17238          if (bitS) {
17239             IRTemp pair = newTemp(Ity_I32);
17240             assign( pair, binop(Iop_Or32,
17241                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17242                                 mkexpr(oldV)) );
17243             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
17244          }
17245          DIP("ml%c%c%s r%u, r%u, r%u, r%u\n",
17246              isMLS ? 's' : 'a', bitS ? 's' : ' ',
17247              nCC(INSN_COND), rD, rM, rS, rN);
17248          goto decode_success;
17249       }
17250       /* fall through */
17251    }
17252
17253    // SMULL, UMULL
17254    if (BITS8(0,0,0,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17255        && INSN(7,4) == BITS4(1,0,0,1)) {
17256       UInt bitS = (insn >> 20) & 1; /* 20:20 */
17257       UInt rDhi = INSN(19,16);
17258       UInt rDlo = INSN(15,12);
17259       UInt rS   = INSN(11,8);
17260       UInt rM   = INSN(3,0);
17261       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
17262       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
17263          /* Unpredictable; don't decode; fall through */
17264       } else {
17265          IRTemp argL  = newTemp(Ity_I32);
17266          IRTemp argR  = newTemp(Ity_I32);
17267          IRTemp res   = newTemp(Ity_I64);
17268          IRTemp resHi = newTemp(Ity_I32);
17269          IRTemp resLo = newTemp(Ity_I32);
17270          IRTemp oldC  = IRTemp_INVALID;
17271          IRTemp oldV  = IRTemp_INVALID;
17272          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
17273          assign( argL, getIRegA(rM));
17274          assign( argR, getIRegA(rS));
17275          assign( res, binop(mulOp, mkexpr(argL), mkexpr(argR)) );
17276          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17277          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17278          if (bitS) {
17279             oldC = newTemp(Ity_I32);
17280             assign(oldC, mk_armg_calculate_flag_c());
17281             oldV = newTemp(Ity_I32);
17282             assign(oldV, mk_armg_calculate_flag_v());
17283          }
17284          // now update guest state
17285          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
17286          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
17287          if (bitS) {
17288             IRTemp pair = newTemp(Ity_I32);
17289             assign( pair, binop(Iop_Or32,
17290                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17291                                 mkexpr(oldV)) );
17292             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
17293          }
17294          DIP("%cmull%c%s r%u, r%u, r%u, r%u\n",
17295              isS ? 's' : 'u', bitS ? 's' : ' ',
17296              nCC(INSN_COND), rDlo, rDhi, rM, rS);
17297          goto decode_success;
17298       }
17299       /* fall through */
17300    }
17301
17302    // SMLAL, UMLAL
17303    if (BITS8(0,0,0,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17304        && INSN(7,4) == BITS4(1,0,0,1)) {
17305       UInt bitS = (insn >> 20) & 1; /* 20:20 */
17306       UInt rDhi = INSN(19,16);
17307       UInt rDlo = INSN(15,12);
17308       UInt rS   = INSN(11,8);
17309       UInt rM   = INSN(3,0);
17310       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
17311       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
17312          /* Unpredictable; don't decode; fall through */
17313       } else {
17314          IRTemp argL  = newTemp(Ity_I32);
17315          IRTemp argR  = newTemp(Ity_I32);
17316          IRTemp old   = newTemp(Ity_I64);
17317          IRTemp res   = newTemp(Ity_I64);
17318          IRTemp resHi = newTemp(Ity_I32);
17319          IRTemp resLo = newTemp(Ity_I32);
17320          IRTemp oldC  = IRTemp_INVALID;
17321          IRTemp oldV  = IRTemp_INVALID;
17322          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
17323          assign( argL, getIRegA(rM));
17324          assign( argR, getIRegA(rS));
17325          assign( old, binop(Iop_32HLto64, getIRegA(rDhi), getIRegA(rDlo)) );
17326          assign( res, binop(Iop_Add64,
17327                             mkexpr(old),
17328                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
17329          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17330          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17331          if (bitS) {
17332             oldC = newTemp(Ity_I32);
17333             assign(oldC, mk_armg_calculate_flag_c());
17334             oldV = newTemp(Ity_I32);
17335             assign(oldV, mk_armg_calculate_flag_v());
17336          }
17337          // now update guest state
17338          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
17339          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
17340          if (bitS) {
17341             IRTemp pair = newTemp(Ity_I32);
17342             assign( pair, binop(Iop_Or32,
17343                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17344                                 mkexpr(oldV)) );
17345             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
17346          }
17347          DIP("%cmlal%c%s r%u, r%u, r%u, r%u\n",
17348              isS ? 's' : 'u', bitS ? 's' : ' ', nCC(INSN_COND),
17349              rDlo, rDhi, rM, rS);
17350          goto decode_success;
17351       }
17352       /* fall through */
17353    }
17354
17355    // UMAAL
17356    if (BITS8(0,0,0,0,0,1,0,0) == INSN(27,20) && INSN(7,4) == BITS4(1,0,0,1)) {
17357       UInt rDhi = INSN(19,16);
17358       UInt rDlo = INSN(15,12);
17359       UInt rM   = INSN(11,8);
17360       UInt rN   = INSN(3,0);
17361       if (rDlo == 15 || rDhi == 15 || rN == 15 || rM == 15 || rDhi == rDlo)  {
17362          /* Unpredictable; don't decode; fall through */
17363       } else {
17364          IRTemp argN   = newTemp(Ity_I32);
17365          IRTemp argM   = newTemp(Ity_I32);
17366          IRTemp argDhi = newTemp(Ity_I32);
17367          IRTemp argDlo = newTemp(Ity_I32);
17368          IRTemp res    = newTemp(Ity_I64);
17369          IRTemp resHi  = newTemp(Ity_I32);
17370          IRTemp resLo  = newTemp(Ity_I32);
17371          assign( argN,   getIRegA(rN) );
17372          assign( argM,   getIRegA(rM) );
17373          assign( argDhi, getIRegA(rDhi) );
17374          assign( argDlo, getIRegA(rDlo) );
17375          assign( res,
17376                  binop(Iop_Add64,
17377                        binop(Iop_Add64,
17378                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
17379                              unop(Iop_32Uto64, mkexpr(argDhi))),
17380                        unop(Iop_32Uto64, mkexpr(argDlo))) );
17381          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17382          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17383          // now update guest state
17384          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
17385          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
17386          DIP("umaal %s r%u, r%u, r%u, r%u\n",
17387              nCC(INSN_COND), rDlo, rDhi, rN, rM);
17388          goto decode_success;
17389       }
17390       /* fall through */
17391    }
17392
17393    /* --------------------- Msr etc --------------------- */
17394
17395    // MSR apsr, #imm
17396    if (INSN(27,20) == BITS8(0,0,1,1,0,0,1,0)
17397        && INSN(17,12) == BITS6(0,0,1,1,1,1)) {
17398       UInt write_ge    = INSN(18,18);
17399       UInt write_nzcvq = INSN(19,19);
17400       if (write_nzcvq || write_ge) {
17401          UInt   imm = (INSN(11,0) >> 0) & 0xFF;
17402          UInt   rot = 2 * ((INSN(11,0) >> 8) & 0xF);
17403          IRTemp immT = newTemp(Ity_I32);
17404          vassert(rot <= 30);
17405          imm = ROR32(imm, rot);
17406          assign(immT, mkU32(imm));
17407          desynthesise_APSR( write_nzcvq, write_ge, immT, condT );
17408          DIP("msr%s cpsr%s%sf, #0x%08x\n", nCC(INSN_COND),
17409              write_nzcvq ? "f" : "", write_ge ? "g" : "", imm);
17410          goto decode_success;
17411       }
17412       /* fall through */
17413    }
17414
17415    // MSR apsr, reg
17416    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
17417        && INSN(17,12) == BITS6(0,0,1,1,1,1)
17418        && INSN(11,4) == BITS8(0,0,0,0,0,0,0,0)) {
17419       UInt rN          = INSN(3,0);
17420       UInt write_ge    = INSN(18,18);
17421       UInt write_nzcvq = INSN(19,19);
17422       if (rN != 15 && (write_nzcvq || write_ge)) {
17423          IRTemp rNt = newTemp(Ity_I32);
17424          assign(rNt, getIRegA(rN));
17425          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
17426          DIP("msr%s cpsr_%s%s, r%u\n", nCC(INSN_COND),
17427              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
17428          goto decode_success;
17429       }
17430       /* fall through */
17431    }
17432
17433    // MRS rD, cpsr
17434    if ((insn & 0x0FFF0FFF) == 0x010F0000) {
17435       UInt rD   = INSN(15,12);
17436       if (rD != 15) {
17437          IRTemp apsr = synthesise_APSR();
17438          putIRegA( rD, mkexpr(apsr), condT, Ijk_Boring );
17439          DIP("mrs%s r%u, cpsr\n", nCC(INSN_COND), rD);
17440          goto decode_success;
17441       }
17442       /* fall through */
17443    }
17444
17445    /* --------------------- Svc --------------------- */
17446    if (BITS8(1,1,1,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))) {
17447       UInt imm24 = (insn >> 0) & 0xFFFFFF;
17448       if (imm24 == 0) {
17449          /* A syscall.  We can't do this conditionally, hence: */
17450          if (condT != IRTemp_INVALID) {
17451             mk_skip_over_A32_if_cond_is_false( condT );
17452          }
17453          // AL after here
17454          llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 4 ));
17455          dres.jk_StopHere = Ijk_Sys_syscall;
17456          dres.whatNext    = Dis_StopHere;
17457          DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
17458          goto decode_success;
17459       }
17460       /* fall through */
17461    }
17462
17463    /* ------------------------ swp ------------------------ */
17464
17465    // SWP, SWPB
17466    if (BITS8(0,0,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
17467        && BITS4(0,0,0,0) == INSN(11,8)
17468        && BITS4(1,0,0,1) == INSN(7,4)) {
17469       UInt   rN   = INSN(19,16);
17470       UInt   rD   = INSN(15,12);
17471       UInt   rM   = INSN(3,0);
17472       IRTemp tRn  = newTemp(Ity_I32);
17473       IRTemp tNew = newTemp(Ity_I32);
17474       IRTemp tOld = IRTemp_INVALID;
17475       IRTemp tSC1 = newTemp(Ity_I1);
17476       UInt   isB  = (insn >> 22) & 1;
17477
17478       if (rD == 15 || rN == 15 || rM == 15 || rN == rM || rN == rD) {
17479          /* undecodable; fall through */
17480       } else {
17481          /* make unconditional */
17482          if (condT != IRTemp_INVALID) {
17483             mk_skip_over_A32_if_cond_is_false( condT );
17484             condT = IRTemp_INVALID;
17485          }
17486          /* Ok, now we're unconditional.  Generate a LL-SC loop. */
17487          assign(tRn, getIRegA(rN));
17488          assign(tNew, getIRegA(rM));
17489          if (isB) {
17490             /* swpb */
17491             tOld = newTemp(Ity_I8);
17492             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
17493                               NULL/*=>isLL*/) );
17494             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
17495                               unop(Iop_32to8, mkexpr(tNew))) );
17496          } else {
17497             /* swp */
17498             tOld = newTemp(Ity_I32);
17499             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
17500                               NULL/*=>isLL*/) );
17501             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
17502                               mkexpr(tNew)) );
17503          }
17504          stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
17505                            /*Ijk_NoRedir*/Ijk_Boring,
17506                            IRConst_U32(guest_R15_curr_instr_notENC),
17507                            OFFB_R15T ));
17508          putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
17509                       IRTemp_INVALID, Ijk_Boring);
17510          DIP("swp%s%s r%u, r%u, [r%u]\n",
17511              isB ? "b" : "", nCC(INSN_COND), rD, rM, rN);
17512          goto decode_success;
17513       }
17514       /* fall through */
17515    }
17516
17517    /* ----------------------------------------------------------- */
17518    /* -- ARMv6 instructions                                    -- */
17519    /* ----------------------------------------------------------- */
17520
17521    /* ------------------- {ldr,str}ex{,b,h,d} ------------------- */
17522
17523    // LDREXD, LDREX, LDREXH, LDREXB
17524    if (0x01900F9F == (insn & 0x0F900FFF)) {
17525       UInt   rT    = INSN(15,12);
17526       UInt   rN    = INSN(19,16);
17527       IRType ty    = Ity_INVALID;
17528       IROp   widen = Iop_INVALID;
17529       const HChar* nm = NULL;
17530       Bool   valid = True;
17531       switch (INSN(22,21)) {
17532          case 0: nm = "";  ty = Ity_I32; break;
17533          case 1: nm = "d"; ty = Ity_I64; break;
17534          case 2: nm = "b"; ty = Ity_I8;  widen = Iop_8Uto32; break;
17535          case 3: nm = "h"; ty = Ity_I16; widen = Iop_16Uto32; break;
17536          default: vassert(0);
17537       }
17538       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
17539          if (rT == 15 || rN == 15)
17540             valid = False;
17541       } else {
17542          vassert(ty == Ity_I64);
17543          if ((rT & 1) == 1 || rT == 14 || rN == 15)
17544             valid = False;
17545       }
17546       if (valid) {
17547          IRTemp res;
17548          /* make unconditional */
17549          if (condT != IRTemp_INVALID) {
17550            mk_skip_over_A32_if_cond_is_false( condT );
17551            condT = IRTemp_INVALID;
17552          }
17553          /* Ok, now we're unconditional.  Do the load. */
17554          res = newTemp(ty);
17555          // FIXME: assumes little-endian guest
17556          stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
17557                            NULL/*this is a load*/) );
17558          if (ty == Ity_I64) {
17559             // FIXME: assumes little-endian guest
17560             putIRegA(rT+0, unop(Iop_64to32, mkexpr(res)),
17561                            IRTemp_INVALID, Ijk_Boring);
17562             putIRegA(rT+1, unop(Iop_64HIto32, mkexpr(res)),
17563                            IRTemp_INVALID, Ijk_Boring);
17564             DIP("ldrex%s%s r%u, r%u, [r%u]\n",
17565                 nm, nCC(INSN_COND), rT+0, rT+1, rN);
17566          } else {
17567             putIRegA(rT, widen == Iop_INVALID
17568                             ? mkexpr(res) : unop(widen, mkexpr(res)),
17569                      IRTemp_INVALID, Ijk_Boring);
17570             DIP("ldrex%s%s r%u, [r%u]\n", nm, nCC(INSN_COND), rT, rN);
17571          }
17572          goto decode_success;
17573       }
17574       /* undecodable; fall through */
17575    }
17576
17577    // STREXD, STREX, STREXH, STREXB
17578    if (0x01800F90 == (insn & 0x0F900FF0)) {
17579       UInt   rT     = INSN(3,0);
17580       UInt   rN     = INSN(19,16);
17581       UInt   rD     = INSN(15,12);
17582       IRType ty     = Ity_INVALID;
17583       IROp   narrow = Iop_INVALID;
17584       const HChar* nm = NULL;
17585       Bool   valid  = True;
17586       switch (INSN(22,21)) {
17587          case 0: nm = "";  ty = Ity_I32; break;
17588          case 1: nm = "d"; ty = Ity_I64; break;
17589          case 2: nm = "b"; ty = Ity_I8;  narrow = Iop_32to8; break;
17590          case 3: nm = "h"; ty = Ity_I16; narrow = Iop_32to16; break;
17591          default: vassert(0);
17592       }
17593       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
17594          if (rD == 15 || rN == 15 || rT == 15
17595              || rD == rN || rD == rT)
17596             valid = False;
17597       } else {
17598          vassert(ty == Ity_I64);
17599          if (rD == 15 || (rT & 1) == 1 || rT == 14 || rN == 15
17600              || rD == rN || rD == rT || rD == rT+1)
17601             valid = False;
17602       }
17603       if (valid) {
17604          IRTemp resSC1, resSC32, data;
17605          /* make unconditional */
17606          if (condT != IRTemp_INVALID) {
17607             mk_skip_over_A32_if_cond_is_false( condT );
17608             condT = IRTemp_INVALID;
17609          }
17610          /* Ok, now we're unconditional.  Do the store. */
17611          data = newTemp(ty);
17612          assign(data,
17613                 ty == Ity_I64
17614                    // FIXME: assumes little-endian guest
17615                    ? binop(Iop_32HLto64, getIRegA(rT+1), getIRegA(rT+0))
17616                    : narrow == Iop_INVALID
17617                       ? getIRegA(rT)
17618                       : unop(narrow, getIRegA(rT)));
17619          resSC1 = newTemp(Ity_I1);
17620          // FIXME: assumes little-endian guest
17621          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) );
17622
17623          /* Set rD to 1 on failure, 0 on success.  Currently we have
17624             resSC1 == 0 on failure, 1 on success. */
17625          resSC32 = newTemp(Ity_I32);
17626          assign(resSC32,
17627                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
17628
17629          putIRegA(rD, mkexpr(resSC32),
17630                       IRTemp_INVALID, Ijk_Boring);
17631          if (ty == Ity_I64) {
17632             DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
17633                 nm, nCC(INSN_COND), rD, rT, rT+1, rN);
17634          } else {
17635             DIP("strex%s%s r%u, r%u, [r%u]\n",
17636                 nm, nCC(INSN_COND), rD, rT, rN);
17637          }
17638          goto decode_success;
17639       }
17640       /* fall through */
17641    }
17642
17643    /* --------------------- movw, movt --------------------- */
17644    if (0x03000000 == (insn & 0x0FF00000)
17645        || 0x03400000 == (insn & 0x0FF00000)) /* pray for CSE */ {
17646       UInt rD    = INSN(15,12);
17647       UInt imm16 = (insn & 0xFFF) | ((insn >> 4) & 0x0000F000);
17648       UInt isT   = (insn >> 22) & 1;
17649       if (rD == 15) {
17650          /* forget it */
17651       } else {
17652          if (isT) {
17653             putIRegA(rD,
17654                      binop(Iop_Or32,
17655                            binop(Iop_And32, getIRegA(rD), mkU32(0xFFFF)),
17656                            mkU32(imm16 << 16)),
17657                      condT, Ijk_Boring);
17658             DIP("movt%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
17659             goto decode_success;
17660          } else {
17661             putIRegA(rD, mkU32(imm16), condT, Ijk_Boring);
17662             DIP("movw%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
17663             goto decode_success;
17664          }
17665       }
17666       /* fall through */
17667    }
17668
17669    /* ----------- uxtb, sxtb, uxth, sxth, uxtb16, sxtb16 ----------- */
17670    /* FIXME: this is an exact duplicate of the Thumb version.  They
17671       should be commoned up. */
17672    if (BITS8(0,1,1,0,1, 0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,0))
17673        && BITS4(1,1,1,1) == INSN(19,16)
17674        && BITS4(0,1,1,1) == INSN(7,4)
17675        && BITS4(0,0, 0,0) == (INSN(11,8) & BITS4(0,0,1,1))) {
17676       UInt subopc = INSN(27,20) & BITS8(0,0,0,0,0, 1,1,1);
17677       if (subopc != BITS4(0,0,0,1) && subopc != BITS4(0,1,0,1)) {
17678          Int    rot  = (INSN(11,8) >> 2) & 3;
17679          UInt   rM   = INSN(3,0);
17680          UInt   rD   = INSN(15,12);
17681          IRTemp srcT = newTemp(Ity_I32);
17682          IRTemp rotT = newTemp(Ity_I32);
17683          IRTemp dstT = newTemp(Ity_I32);
17684          const HChar* nm = "???";
17685          assign(srcT, getIRegA(rM));
17686          assign(rotT, genROR32(srcT, 8 * rot)); /* 0, 8, 16 or 24 only */
17687          switch (subopc) {
17688             case BITS4(0,1,1,0): // UXTB
17689                assign(dstT, unop(Iop_8Uto32, unop(Iop_32to8, mkexpr(rotT))));
17690                nm = "uxtb";
17691                break;
17692             case BITS4(0,0,1,0): // SXTB
17693                assign(dstT, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rotT))));
17694                nm = "sxtb";
17695                break;
17696             case BITS4(0,1,1,1): // UXTH
17697                assign(dstT, unop(Iop_16Uto32, unop(Iop_32to16, mkexpr(rotT))));
17698                nm = "uxth";
17699                break;
17700             case BITS4(0,0,1,1): // SXTH
17701                assign(dstT, unop(Iop_16Sto32, unop(Iop_32to16, mkexpr(rotT))));
17702                nm = "sxth";
17703                break;
17704             case BITS4(0,1,0,0): // UXTB16
17705                assign(dstT, binop(Iop_And32, mkexpr(rotT), mkU32(0x00FF00FF)));
17706                nm = "uxtb16";
17707                break;
17708             case BITS4(0,0,0,0): { // SXTB16
17709                IRTemp lo32 = newTemp(Ity_I32);
17710                IRTemp hi32 = newTemp(Ity_I32);
17711                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
17712                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
17713                assign(
17714                   dstT,
17715                   binop(Iop_Or32,
17716                         binop(Iop_And32,
17717                               unop(Iop_8Sto32,
17718                                    unop(Iop_32to8, mkexpr(lo32))),
17719                               mkU32(0xFFFF)),
17720                         binop(Iop_Shl32,
17721                               unop(Iop_8Sto32,
17722                                    unop(Iop_32to8, mkexpr(hi32))),
17723                               mkU8(16))
17724                ));
17725                nm = "sxtb16";
17726                break;
17727             }
17728             default:
17729                vassert(0); // guarded by "if" above
17730          }
17731          putIRegA(rD, mkexpr(dstT), condT, Ijk_Boring);
17732          DIP("%s%s r%u, r%u, ROR #%d\n", nm, nCC(INSN_COND), rD, rM, rot);
17733          goto decode_success;
17734       }
17735       /* fall through */
17736    }
17737
17738    /* ------------------- bfi, bfc ------------------- */
17739    if (BITS8(0,1,1,1,1,1,0, 0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
17740        && BITS4(0, 0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
17741       UInt rD  = INSN(15,12);
17742       UInt rN  = INSN(3,0);
17743       UInt msb = (insn >> 16) & 0x1F; /* 20:16 */
17744       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
17745       if (rD == 15 || msb < lsb) {
17746          /* undecodable; fall through */
17747       } else {
17748          IRTemp src    = newTemp(Ity_I32);
17749          IRTemp olddst = newTemp(Ity_I32);
17750          IRTemp newdst = newTemp(Ity_I32);
17751          UInt   mask   = ((UInt)1) << (msb - lsb);
17752          mask = (mask - 1) + mask;
17753          vassert(mask != 0); // guaranteed by "msb < lsb" check above
17754          mask <<= lsb;
17755
17756          assign(src, rN == 15 ? mkU32(0) : getIRegA(rN));
17757          assign(olddst, getIRegA(rD));
17758          assign(newdst,
17759                 binop(Iop_Or32,
17760                    binop(Iop_And32,
17761                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
17762                          mkU32(mask)),
17763                    binop(Iop_And32,
17764                          mkexpr(olddst),
17765                          mkU32(~mask)))
17766                );
17767
17768          putIRegA(rD, mkexpr(newdst), condT, Ijk_Boring);
17769
17770          if (rN == 15) {
17771             DIP("bfc%s r%u, #%u, #%u\n",
17772                 nCC(INSN_COND), rD, lsb, msb-lsb+1);
17773          } else {
17774             DIP("bfi%s r%u, r%u, #%u, #%u\n",
17775                 nCC(INSN_COND), rD, rN, lsb, msb-lsb+1);
17776          }
17777          goto decode_success;
17778       }
17779       /* fall through */
17780    }
17781
17782    /* ------------------- {u,s}bfx ------------------- */
17783    if (BITS8(0,1,1,1,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17784        && BITS4(0,1,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
17785       UInt rD  = INSN(15,12);
17786       UInt rN  = INSN(3,0);
17787       UInt wm1 = (insn >> 16) & 0x1F; /* 20:16 */
17788       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
17789       UInt msb = lsb + wm1;
17790       UInt isU = (insn >> 22) & 1;    /* 22:22 */
17791       if (rD == 15 || rN == 15 || msb >= 32) {
17792          /* undecodable; fall through */
17793       } else {
17794          IRTemp src  = newTemp(Ity_I32);
17795          IRTemp tmp  = newTemp(Ity_I32);
17796          IRTemp res  = newTemp(Ity_I32);
17797          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
17798          vassert(msb <= 31);
17799          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
17800
17801          assign(src, getIRegA(rN));
17802          assign(tmp, binop(Iop_And32,
17803                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
17804                            mkU32(mask)));
17805          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
17806                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
17807                            mkU8(31-wm1)));
17808
17809          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17810
17811          DIP("%s%s r%u, r%u, #%u, #%u\n",
17812              isU ? "ubfx" : "sbfx",
17813              nCC(INSN_COND), rD, rN, lsb, wm1 + 1);
17814          goto decode_success;
17815       }
17816       /* fall through */
17817    }
17818
17819    /* --------------------- Load/store doubleword ------------- */
17820    // LDRD STRD
17821    /*                 31   27   23   19 15 11   7    3     # highest bit
17822                         28   24   20 16 12    8    4    0
17823       A5-36   1 | 16  cond 0001 U100 Rn Rd im4h 11S1 im4l
17824       A5-38   1 | 32  cond 0001 U000 Rn Rd 0000 11S1 Rm
17825       A5-40   2 | 16  cond 0001 U110 Rn Rd im4h 11S1 im4l
17826       A5-42   2 | 32  cond 0001 U010 Rn Rd 0000 11S1 Rm
17827       A5-44   3 | 16  cond 0000 U100 Rn Rd im4h 11S1 im4l
17828       A5-46   3 | 32  cond 0000 U000 Rn Rd 0000 11S1 Rm
17829    */
17830    /* case coding:
17831              1   at-ea               (access at ea)
17832              2   at-ea-then-upd      (access at ea, then Rn = ea)
17833              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
17834       ea coding
17835              16  Rn +/- imm8
17836              32  Rn +/- Rm
17837    */
17838    /* Quickly skip over all of this for hopefully most instructions */
17839    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
17840       goto after_load_store_doubleword;
17841
17842    /* Check the "11S1" thing. */
17843    if ((INSN(7,4) & BITS4(1,1,0,1)) != BITS4(1,1,0,1))
17844       goto after_load_store_doubleword;
17845
17846    summary = 0;
17847
17848    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,0,0)) {
17849       summary = 1 | 16;
17850    }
17851    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,0,0)) {
17852       summary = 1 | 32;
17853    }
17854    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,1,0)) {
17855       summary = 2 | 16;
17856    }
17857    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,1,0)) {
17858       summary = 2 | 32;
17859    }
17860    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(1,0,0)) {
17861       summary = 3 | 16;
17862    }
17863    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(0,0,0)) {
17864       summary = 3 | 32;
17865    }
17866    else goto after_load_store_doubleword;
17867
17868    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
17869      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
17870      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
17871      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
17872      UInt bS   = (insn >> 5) & 1;    /* S=1 store, S=0 load */
17873      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
17874
17875      /* Require rD to be an even numbered register */
17876      if ((rD & 1) != 0)
17877         goto after_load_store_doubleword;
17878
17879      /* Require 11:8 == 0 for Rn +/- Rm cases */
17880      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
17881         goto after_load_store_doubleword;
17882
17883      /* Skip some invalid cases, which would lead to two competing
17884         updates to the same register, or which are otherwise
17885         disallowed by the spec. */
17886      switch (summary) {
17887         case 1 | 16:
17888            break;
17889         case 1 | 32:
17890            if (rM == 15) goto after_load_store_doubleword;
17891            break;
17892         case 2 | 16: case 3 | 16:
17893            if (rN == 15) goto after_load_store_doubleword;
17894            if (bS == 0 && (rN == rD || rN == rD+1))
17895               goto after_load_store_doubleword;
17896            break;
17897         case 2 | 32: case 3 | 32:
17898            if (rM == 15) goto after_load_store_doubleword;
17899            if (rN == 15) goto after_load_store_doubleword;
17900            if (rN == rM) goto after_load_store_doubleword;
17901            if (bS == 0 && (rN == rD || rN == rD+1))
17902               goto after_load_store_doubleword;
17903            break;
17904         default:
17905            vassert(0);
17906      }
17907
17908      /* If this is a branch, make it unconditional at this point.
17909         Doing conditional branches in-line is too complex (for
17910         now). */
17911      vassert((rD & 1) == 0); /* from tests above */
17912      if (bS == 0 && rD+1 == 15 && condT != IRTemp_INVALID) {
17913         // go uncond
17914         mk_skip_over_A32_if_cond_is_false( condT );
17915         condT = IRTemp_INVALID;
17916         // now uncond
17917      }
17918
17919      /* compute the effective address.  Bind it to a tmp since we
17920         may need to use it twice. */
17921      IRExpr* eaE = NULL;
17922      switch (summary & 0xF0) {
17923         case 16:
17924            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
17925            break;
17926         case 32:
17927            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
17928            break;
17929      }
17930      vassert(eaE);
17931      IRTemp eaT = newTemp(Ity_I32);
17932      assign(eaT, eaE);
17933
17934      /* get the old Rn value */
17935      IRTemp rnT = newTemp(Ity_I32);
17936      assign(rnT, getIRegA(rN));
17937
17938      /* decide on the transfer address */
17939      IRTemp taT = IRTemp_INVALID;
17940      switch (summary & 0x0F) {
17941         case 1: case 2: taT = eaT; break;
17942         case 3:         taT = rnT; break;
17943      }
17944      vassert(taT != IRTemp_INVALID);
17945
17946      /* XXX deal with alignment constraints */
17947      /* XXX: but the A8 doesn't seem to trap for misaligned loads, so,
17948         ignore alignment issues for the time being. */
17949
17950      /* For almost all cases, we do the writeback after the transfers.
17951         However, that leaves the stack "uncovered" in cases like:
17952            strd    rD, [sp, #-8]
17953            strd    rD, [sp, #-16]
17954         In which case, do the writeback to SP now, instead of later.
17955         This is bad in that it makes the insn non-restartable if the
17956         accesses fault, but at least keeps Memcheck happy. */
17957      Bool writeback_already_done = False;
17958      if (bS == 1 /*store*/ && summary == (2 | 16)
17959          && rN == 13 && rN != rD && rN != rD+1
17960          && bU == 0/*minus*/
17961          && (imm8 == 8 || imm8 == 16)) {
17962         putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
17963         writeback_already_done = True;
17964      }
17965
17966      /* doubleword store  S 1
17967         doubleword load   S 0
17968      */
17969      const HChar* name = NULL;
17970      /* generate the transfers */
17971      if (bS == 1) { // doubleword store
17972         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(0)),
17973                         getIRegA(rD+0), condT );
17974         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(4)),
17975                         getIRegA(rD+1), condT );
17976         name = "strd";
17977      } else { // doubleword load
17978         IRTemp oldRd0 = newTemp(Ity_I32);
17979         IRTemp oldRd1 = newTemp(Ity_I32);
17980         assign(oldRd0, llGetIReg(rD+0));
17981         assign(oldRd1, llGetIReg(rD+1));
17982         IRTemp newRd0 = newTemp(Ity_I32);
17983         IRTemp newRd1 = newTemp(Ity_I32);
17984         loadGuardedLE( newRd0, ILGop_Ident32,
17985                        binop(Iop_Add32, mkexpr(taT), mkU32(0)),
17986                        mkexpr(oldRd0), condT );
17987         putIRegA( rD+0, mkexpr(newRd0), IRTemp_INVALID, Ijk_Boring );
17988         loadGuardedLE( newRd1, ILGop_Ident32,
17989                        binop(Iop_Add32, mkexpr(taT), mkU32(4)),
17990                        mkexpr(oldRd1), condT );
17991         putIRegA( rD+1, mkexpr(newRd1), IRTemp_INVALID, Ijk_Boring );
17992         name = "ldrd";
17993      }
17994
17995      /* Update Rn if necessary. */
17996      switch (summary & 0x0F) {
17997         case 2: case 3:
17998            // should be assured by logic above:
17999            vassert(rN != 15); /* from checks above */
18000            if (bS == 0) {
18001               vassert(rD+0 != rN); /* since we just wrote rD+0 */
18002               vassert(rD+1 != rN); /* since we just wrote rD+1 */
18003            }
18004            if (!writeback_already_done)
18005               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
18006            break;
18007      }
18008
18009      switch (summary & 0x0F) {
18010         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
18011                  break;
18012         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
18013                      name, nCC(INSN_COND), rD, dis_buf);
18014                  break;
18015         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
18016                      name, nCC(INSN_COND), rD, dis_buf);
18017                  break;
18018         default: vassert(0);
18019      }
18020
18021      goto decode_success;
18022    }
18023
18024   after_load_store_doubleword:
18025
18026    /* ------------------- {s,u}xtab ------------- */
18027    if (BITS8(0,1,1,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
18028        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
18029        && BITS4(0,1,1,1) == INSN(7,4)) {
18030       UInt rN  = INSN(19,16);
18031       UInt rD  = INSN(15,12);
18032       UInt rM  = INSN(3,0);
18033       UInt rot = (insn >> 10) & 3;
18034       UInt isU = INSN(22,22);
18035       if (rN == 15/*it's {S,U}XTB*/ || rD == 15 || rM == 15) {
18036          /* undecodable; fall through */
18037       } else {
18038          IRTemp srcL = newTemp(Ity_I32);
18039          IRTemp srcR = newTemp(Ity_I32);
18040          IRTemp res  = newTemp(Ity_I32);
18041          assign(srcR, getIRegA(rM));
18042          assign(srcL, getIRegA(rN));
18043          assign(res,  binop(Iop_Add32,
18044                             mkexpr(srcL),
18045                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
18046                                  unop(Iop_32to8,
18047                                       genROR32(srcR, 8 * rot)))));
18048          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18049          DIP("%cxtab%s r%u, r%u, r%u, ror #%u\n",
18050              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
18051          goto decode_success;
18052       }
18053       /* fall through */
18054    }
18055
18056    /* ------------------- {s,u}xtah ------------- */
18057    if (BITS8(0,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
18058        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
18059        && BITS4(0,1,1,1) == INSN(7,4)) {
18060       UInt rN  = INSN(19,16);
18061       UInt rD  = INSN(15,12);
18062       UInt rM  = INSN(3,0);
18063       UInt rot = (insn >> 10) & 3;
18064       UInt isU = INSN(22,22);
18065       if (rN == 15/*it's {S,U}XTH*/ || rD == 15 || rM == 15) {
18066          /* undecodable; fall through */
18067       } else {
18068          IRTemp srcL = newTemp(Ity_I32);
18069          IRTemp srcR = newTemp(Ity_I32);
18070          IRTemp res  = newTemp(Ity_I32);
18071          assign(srcR, getIRegA(rM));
18072          assign(srcL, getIRegA(rN));
18073          assign(res,  binop(Iop_Add32,
18074                             mkexpr(srcL),
18075                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
18076                                  unop(Iop_32to16,
18077                                       genROR32(srcR, 8 * rot)))));
18078          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18079
18080          DIP("%cxtah%s r%u, r%u, r%u, ror #%u\n",
18081              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
18082          goto decode_success;
18083       }
18084       /* fall through */
18085    }
18086
18087    /* ------------------- rev16, rev ------------------ */
18088    if (INSN(27,16) == 0x6BF
18089        && (INSN(11,4) == 0xFB/*rev16*/ || INSN(11,4) == 0xF3/*rev*/)) {
18090       Bool isREV = INSN(11,4) == 0xF3;
18091       UInt rM    = INSN(3,0);
18092       UInt rD    = INSN(15,12);
18093       if (rM != 15 && rD != 15) {
18094          IRTemp rMt = newTemp(Ity_I32);
18095          assign(rMt, getIRegA(rM));
18096          IRTemp res = isREV ? gen_REV(rMt) : gen_REV16(rMt);
18097          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18098          DIP("rev%s%s r%u, r%u\n", isREV ? "" : "16",
18099              nCC(INSN_COND), rD, rM);
18100          goto decode_success;
18101       }
18102    }
18103
18104    /* ------------------- revsh ----------------------- */
18105    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xFB) {
18106       UInt rM = INSN(3,0);
18107       UInt rD = INSN(15,12);
18108       if (rM != 15 && rD != 15) {
18109          IRTemp irt_rM  = newTemp(Ity_I32);
18110          IRTemp irt_hi  = newTemp(Ity_I32);
18111          IRTemp irt_low = newTemp(Ity_I32);
18112          IRTemp irt_res = newTemp(Ity_I32);
18113          assign(irt_rM, getIRegA(rM));
18114          assign(irt_hi,
18115                 binop(Iop_Sar32,
18116                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
18117                       mkU8(16)
18118                 )
18119          );
18120          assign(irt_low,
18121                 binop(Iop_And32,
18122                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
18123                       mkU32(0xFF)
18124                 )
18125          );
18126          assign(irt_res,
18127                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
18128          );
18129          putIRegA(rD, mkexpr(irt_res), condT, Ijk_Boring);
18130          DIP("revsh%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
18131          goto decode_success;
18132       }
18133    }
18134
18135    /* ------------------- rbit ------------------ */
18136    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xF3) {
18137       UInt rD = INSN(15,12);
18138       UInt rM = INSN(3,0);
18139       if (rD != 15 && rM != 15) {
18140          IRTemp arg = newTemp(Ity_I32);
18141          assign(arg, getIRegA(rM));
18142          IRTemp res = gen_BITREV(arg);
18143          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18144          DIP("rbit r%u, r%u\n", rD, rM);
18145          goto decode_success;
18146       }
18147    }
18148
18149    /* ------------------- smmul ------------------ */
18150    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
18151        && INSN(15,12) == BITS4(1,1,1,1)
18152        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
18153       UInt bitR = INSN(5,5);
18154       UInt rD = INSN(19,16);
18155       UInt rM = INSN(11,8);
18156       UInt rN = INSN(3,0);
18157       if (rD != 15 && rM != 15 && rN != 15) {
18158          IRExpr* res
18159          = unop(Iop_64HIto32,
18160                 binop(Iop_Add64,
18161                       binop(Iop_MullS32, getIRegA(rN), getIRegA(rM)),
18162                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
18163          putIRegA(rD, res, condT, Ijk_Boring);
18164          DIP("smmul%s%s r%u, r%u, r%u\n",
18165              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM);
18166          goto decode_success;
18167       }
18168    }
18169
18170    /* ------------------- smmla ------------------ */
18171    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
18172        && INSN(15,12) != BITS4(1,1,1,1)
18173        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
18174       UInt bitR = INSN(5,5);
18175       UInt rD = INSN(19,16);
18176       UInt rA = INSN(15,12);
18177       UInt rM = INSN(11,8);
18178       UInt rN = INSN(3,0);
18179       if (rD != 15 && rM != 15 && rN != 15) {
18180          IRExpr* res
18181          = unop(Iop_64HIto32,
18182                 binop(Iop_Add64,
18183                       binop(Iop_Add64,
18184                             binop(Iop_32HLto64, getIRegA(rA), mkU32(0)),
18185                             binop(Iop_MullS32, getIRegA(rN), getIRegA(rM))),
18186                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
18187          putIRegA(rD, res, condT, Ijk_Boring);
18188          DIP("smmla%s%s r%u, r%u, r%u, r%u\n",
18189              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM, rA);
18190          goto decode_success;
18191       }
18192    }
18193
18194    /* -------------- (A1) LDRT reg+/-#imm12 -------------- */
18195    /* Load Register Unprivileged:
18196       ldrt<c> Rt, [Rn] {, #+/-imm12}
18197    */
18198    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,1) ) {
18199       UInt rT     = INSN(15,12);
18200       UInt rN     = INSN(19,16);
18201       UInt imm12  = INSN(11,0);
18202       UInt bU     = INSN(23,23);
18203       Bool valid  = True;
18204       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18205       if (valid) {
18206          IRTemp newRt = newTemp(Ity_I32);
18207          loadGuardedLE( newRt,
18208                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
18209          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18210          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18211                              getIRegA(rN), mkU32(imm12));
18212          putIRegA(rN, erN, condT, Ijk_Boring);
18213          DIP("ldrt%s r%u, [r%u], #%c%u\n",
18214              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18215          goto decode_success;
18216       }
18217    }
18218
18219    /* -------------- (A2) LDRT reg+/-reg with shift -------------- */
18220    /* Load Register Unprivileged:
18221       ldrt<c> Rt, [Rn], +/-Rm{, shift}
18222    */
18223    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,1)
18224         && INSN(4,4) == 0 ) {
18225       UInt rT     = INSN(15,12);
18226       UInt rN     = INSN(19,16);
18227       UInt rM     = INSN(3,0);
18228       UInt imm5   = INSN(11,7);
18229       UInt bU     = INSN(23,23);
18230       UInt type   = INSN(6,5);
18231       Bool valid  = True;
18232       if (rT == 15 || rN == 15 || rN == rT || rM == 15
18233           /* || (ArchVersion() < 6 && rM == rN) */)
18234          valid = False;
18235       if (valid) {
18236          IRTemp newRt = newTemp(Ity_I32);
18237          loadGuardedLE( newRt,
18238                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
18239          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18240          // dis_buf generated is slightly bogus, in fact.
18241          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18242                                                        type, imm5, dis_buf);
18243          putIRegA(rN, erN, condT, Ijk_Boring);
18244          DIP("ldrt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18245          goto decode_success;
18246       }
18247    }
18248
18249    /* -------------- (A1) LDRBT reg+/-#imm12 -------------- */
18250    /* Load Register Byte Unprivileged:
18251       ldrbt<c> Rt, [Rn], #+/-imm12
18252    */
18253    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,1) ) {
18254       UInt rT     = INSN(15,12);
18255       UInt rN     = INSN(19,16);
18256       UInt imm12  = INSN(11,0);
18257       UInt bU     = INSN(23,23);
18258       Bool valid  = True;
18259       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18260       if (valid) {
18261          IRTemp newRt = newTemp(Ity_I32);
18262          loadGuardedLE( newRt,
18263                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
18264          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18265          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18266                              getIRegA(rN), mkU32(imm12));
18267          putIRegA(rN, erN, condT, Ijk_Boring);
18268          DIP("ldrbt%s r%u, [r%u], #%c%u\n",
18269              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18270          goto decode_success;
18271       }
18272    }
18273
18274    /* -------------- (A2) LDRBT reg+/-reg with shift -------------- */
18275    /* Load Register Byte Unprivileged:
18276       ldrbt<c> Rt, [Rn], +/-Rm{, shift}
18277    */
18278    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,1)
18279         && INSN(4,4) == 0 ) {
18280       UInt rT     = INSN(15,12);
18281       UInt rN     = INSN(19,16);
18282       UInt rM     = INSN(3,0);
18283       UInt imm5   = INSN(11,7);
18284       UInt bU     = INSN(23,23);
18285       UInt type   = INSN(6,5);
18286       Bool valid  = True;
18287       if (rT == 15 || rN == 15 || rN == rT || rM == 15
18288           /* || (ArchVersion() < 6 && rM == rN) */)
18289          valid = False;
18290       if (valid) {
18291          IRTemp newRt = newTemp(Ity_I32);
18292          loadGuardedLE( newRt,
18293                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
18294          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18295          // dis_buf generated is slightly bogus, in fact.
18296          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18297                                                        type, imm5, dis_buf);
18298          putIRegA(rN, erN, condT, Ijk_Boring);
18299          DIP("ldrbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18300          goto decode_success;
18301       }
18302    }
18303
18304    /* -------------- (A1) LDRHT reg+#imm8 -------------- */
18305    /* Load Register Halfword Unprivileged:
18306       ldrht<c> Rt, [Rn] {, #+/-imm8}
18307    */
18308    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
18309        && INSN(7,4) == BITS4(1,0,1,1) ) {
18310       UInt rT    = INSN(15,12);
18311       UInt rN    = INSN(19,16);
18312       UInt bU    = INSN(23,23);
18313       UInt imm4H = INSN(11,8);
18314       UInt imm4L = INSN(3,0);
18315       UInt imm8  = (imm4H << 4) | imm4L;
18316       Bool valid = True;
18317       if (rT == 15 || rN == 15 || rN == rT)
18318          valid = False;
18319       if (valid) {
18320          IRTemp newRt = newTemp(Ity_I32);
18321          loadGuardedLE( newRt,
18322                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
18323          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18324          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18325                              getIRegA(rN), mkU32(imm8));
18326          putIRegA(rN, erN, condT, Ijk_Boring);
18327          DIP("ldrht%s r%u, [r%u], #%c%u\n",
18328              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18329          goto decode_success;
18330       }
18331    }
18332
18333    /* -------------- (A2) LDRHT reg+/-reg -------------- */
18334    /* Load Register Halfword Unprivileged:
18335       ldrht<c> Rt, [Rn], +/-Rm
18336    */
18337    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
18338        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
18339       UInt rT    = INSN(15,12);
18340       UInt rN    = INSN(19,16);
18341       UInt rM    = INSN(3,0);
18342       UInt bU    = INSN(23,23);
18343       Bool valid = True;
18344       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
18345          valid = False;
18346       if (valid) {
18347          IRTemp newRt = newTemp(Ity_I32);
18348          loadGuardedLE( newRt,
18349                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
18350          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18351          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18352                              getIRegA(rN), getIRegA(rM));
18353          putIRegA(rN, erN, condT, Ijk_Boring);
18354          DIP("ldrht%s r%u, [r%u], %cr%u\n",
18355              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18356          goto decode_success;
18357       }
18358    }
18359
18360    /* -------------- (A1) LDRSHT reg+#imm8 -------------- */
18361    /* Load Register Signed Halfword Unprivileged:
18362       ldrsht<c> Rt, [Rn] {, #+/-imm8}
18363    */
18364    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
18365        && INSN(7,4) == BITS4(1,1,1,1)) {
18366       UInt rT    = INSN(15,12);
18367       UInt rN    = INSN(19,16);
18368       UInt bU    = INSN(23,23);
18369       UInt imm4H = INSN(11,8);
18370       UInt imm4L = INSN(3,0);
18371       UInt imm8  = (imm4H << 4) | imm4L;
18372       Bool valid = True;
18373       if (rN == 15 || rT == 15 || rN == rT)
18374          valid = False;
18375       if (valid) {
18376          IRTemp newRt = newTemp(Ity_I32);
18377          loadGuardedLE( newRt,
18378                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
18379          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18380          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18381                              getIRegA(rN), mkU32(imm8));
18382          putIRegA(rN, erN, condT, Ijk_Boring);
18383          DIP("ldrsht%s r%u, [r%u], #%c%u\n",
18384              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18385          goto decode_success;
18386       }
18387    }
18388
18389    /* -------------- (A2) LDRSHT reg+/-reg -------------- */
18390    /* Load Register Signed Halfword Unprivileged:
18391       ldrsht<c> Rt, [Rn], +/-Rm
18392    */
18393    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
18394        && INSN(11,4) == BITS8(0,0,0,0,1,1,1,1)) {
18395       UInt rT    = INSN(15,12);
18396       UInt rN    = INSN(19,16);
18397       UInt rM    = INSN(3,0);
18398       UInt bU    = INSN(23,23);
18399       Bool valid = True;
18400       if (rN == 15 || rT == 15 || rN == rT || rM == 15)
18401          valid = False;
18402       if (valid) {
18403          IRTemp newRt = newTemp(Ity_I32);
18404          loadGuardedLE( newRt,
18405                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
18406          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18407          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18408                              getIRegA(rN), getIRegA(rM));
18409          putIRegA(rN, erN, condT, Ijk_Boring);
18410          DIP("ldrsht%s r%u, [r%u], %cr%u\n",
18411              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18412          goto decode_success;
18413       }
18414    }
18415
18416    /* -------------- (A1) LDRSBT reg+#imm8 -------------- */
18417    /* Load Register Signed Byte Unprivileged:
18418       ldrsbt<c> Rt, [Rn] {, #+/-imm8}
18419    */
18420    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
18421        && INSN(7,4) == BITS4(1,1,0,1)) {
18422       UInt rT    = INSN(15,12);
18423       UInt rN    = INSN(19,16);
18424       UInt bU    = INSN(23,23);
18425       UInt imm4H = INSN(11,8);
18426       UInt imm4L = INSN(3,0);
18427       UInt imm8  = (imm4H << 4) | imm4L;
18428       Bool valid = True;
18429       if (rT == 15 || rN == 15 || rN == rT)
18430          valid = False;
18431       if (valid) {
18432          IRTemp newRt = newTemp(Ity_I32);
18433          loadGuardedLE( newRt,
18434                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
18435          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18436          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18437                              getIRegA(rN), mkU32(imm8));
18438          putIRegA(rN, erN, condT, Ijk_Boring);
18439          DIP("ldrsbt%s r%u, [r%u], #%c%u\n",
18440              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18441          goto decode_success;
18442       }
18443    }
18444
18445    /* -------------- (A2) LDRSBT reg+/-reg -------------- */
18446    /* Load Register Signed Byte Unprivileged:
18447       ldrsbt<c> Rt, [Rn], +/-Rm
18448    */
18449    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
18450        && INSN(11,4) == BITS8(0,0,0,0,1,1,0,1)) {
18451       UInt rT    = INSN(15,12);
18452       UInt rN    = INSN(19,16);
18453       UInt bU    = INSN(23,23);
18454       UInt rM    = INSN(3,0);
18455       Bool valid = True;
18456       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
18457          valid = False;
18458       if (valid) {
18459          IRTemp newRt = newTemp(Ity_I32);
18460          loadGuardedLE( newRt,
18461                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
18462          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18463          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18464                              getIRegA(rN), getIRegA(rM));
18465          putIRegA(rN, erN, condT, Ijk_Boring);
18466          DIP("ldrsbt%s r%u, [r%u], %cr%u\n",
18467              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18468          goto decode_success;
18469       }
18470    }
18471
18472    /* -------------- (A1) STRBT reg+#imm12 -------------- */
18473    /* Store Register Byte Unprivileged:
18474       strbt<c> Rt, [Rn], #+/-imm12
18475    */
18476    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,0) ) {
18477       UInt rT     = INSN(15,12);
18478       UInt rN     = INSN(19,16);
18479       UInt imm12  = INSN(11,0);
18480       UInt bU     = INSN(23,23);
18481       Bool valid = True;
18482       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18483       if (valid) {
18484          IRExpr* address = getIRegA(rN);
18485          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
18486          storeGuardedLE( address, data, condT);
18487          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18488                                getIRegA(rN), mkU32(imm12));
18489          putIRegA(rN, newRn, condT, Ijk_Boring);
18490          DIP("strbt%s r%u, [r%u], #%c%u\n",
18491              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18492          goto decode_success;
18493       }
18494    }
18495
18496    /* -------------- (A2) STRBT reg+/-reg -------------- */
18497    /* Store Register Byte Unprivileged:
18498       strbt<c> Rt, [Rn], +/-Rm{, shift}
18499    */
18500    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,0)
18501        && INSN(4,4) == 0) {
18502       UInt rT     = INSN(15,12);
18503       UInt rN     = INSN(19,16);
18504       UInt imm5   = INSN(11,7);
18505       UInt type   = INSN(6,5);
18506       UInt rM     = INSN(3,0);
18507       UInt bU     = INSN(23,23);
18508       Bool valid  = True;
18509       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
18510       if (valid) {
18511          IRExpr* address = getIRegA(rN);
18512          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
18513          storeGuardedLE( address, data, condT);
18514          // dis_buf generated is slightly bogus, in fact.
18515          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18516                                                        type, imm5, dis_buf);
18517          putIRegA(rN, erN, condT, Ijk_Boring);
18518          DIP("strbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18519          goto decode_success;
18520       }
18521    }
18522
18523    /* -------------- (A1) STRHT reg+#imm8 -------------- */
18524    /* Store Register Halfword Unprivileged:
18525       strht<c> Rt, [Rn], #+/-imm8
18526    */
18527    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,0)
18528        && INSN(7,4) == BITS4(1,0,1,1) ) {
18529       UInt rT    = INSN(15,12);
18530       UInt rN    = INSN(19,16);
18531       UInt imm4H = INSN(11,8);
18532       UInt imm4L = INSN(3,0);
18533       UInt imm8  = (imm4H << 4) | imm4L;
18534       UInt bU    = INSN(23,23);
18535       Bool valid = True;
18536       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18537       if (valid) {
18538          IRExpr* address = getIRegA(rN);
18539          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
18540          storeGuardedLE( address, data, condT);
18541          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18542                                getIRegA(rN), mkU32(imm8));
18543          putIRegA(rN, newRn, condT, Ijk_Boring);
18544          DIP("strht%s r%u, [r%u], #%c%u\n",
18545              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18546          goto decode_success;
18547       }
18548    }
18549
18550    /* -------------- (A2) STRHT reg+reg -------------- */
18551    /* Store Register Halfword Unprivileged:
18552       strht<c> Rt, [Rn], +/-Rm
18553    */
18554    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,0)
18555        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
18556       UInt rT    = INSN(15,12);
18557       UInt rN    = INSN(19,16);
18558       UInt rM    = INSN(3,0);
18559       UInt bU    = INSN(23,23);
18560       Bool valid = True;
18561       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
18562       if (valid) {
18563          IRExpr* address = getIRegA(rN);
18564          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
18565          storeGuardedLE( address, data, condT);
18566          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18567                                getIRegA(rN), getIRegA(rM));
18568          putIRegA(rN, newRn, condT, Ijk_Boring);
18569          DIP("strht%s r%u, [r%u], %cr%u\n",
18570              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18571          goto decode_success;
18572       }
18573    }
18574
18575    /* -------------- (A1) STRT reg+imm12 -------------- */
18576    /* Store Register Unprivileged:
18577       strt<c> Rt, [Rn], #+/-imm12
18578    */
18579    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,0) ) {
18580       UInt rT    = INSN(15,12);
18581       UInt rN    = INSN(19,16);
18582       UInt imm12 = INSN(11,0);
18583       UInt bU    = INSN(23,23);
18584       Bool valid = True;
18585       if (rN == 15 || rN == rT) valid = False;
18586       if (valid) {
18587          IRExpr* address = getIRegA(rN);
18588          storeGuardedLE( address, getIRegA(rT), condT);
18589          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18590                                getIRegA(rN), mkU32(imm12));
18591          putIRegA(rN, newRn, condT, Ijk_Boring);
18592          DIP("strt%s r%u, [r%u], %c%u\n",
18593              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18594          goto decode_success;
18595       }
18596    }
18597
18598    /* -------------- (A2) STRT reg+reg -------------- */
18599    /* Store Register Unprivileged:
18600       strt<c> Rt, [Rn], +/-Rm{, shift}
18601    */
18602    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,0)
18603        && INSN(4,4) == 0 ) {
18604       UInt rT    = INSN(15,12);
18605       UInt rN    = INSN(19,16);
18606       UInt rM    = INSN(3,0);
18607       UInt type  = INSN(6,5);
18608       UInt imm5  = INSN(11,7);
18609       UInt bU    = INSN(23,23);
18610       Bool valid = True;
18611       if (rN == 15 || rN == rT || rM == 15) valid = False;
18612       /* FIXME We didn't do:
18613          if ArchVersion() < 6 && rM == rN then UNPREDICTABLE */
18614       if (valid) {
18615          storeGuardedLE( getIRegA(rN), getIRegA(rT), condT);
18616          // dis_buf generated is slightly bogus, in fact.
18617          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18618                                                        type, imm5, dis_buf);
18619          putIRegA(rN, erN, condT, Ijk_Boring);
18620          DIP("strt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18621          goto decode_success;
18622       }
18623    }
18624
18625    /* ----------------------------------------------------------- */
18626    /* -- ARMv7 instructions                                    -- */
18627    /* ----------------------------------------------------------- */
18628
18629    /* -------------- read CP15 TPIDRURO register ------------- */
18630    /* mrc     p15, 0, r0,  c13, c0, 3  up to
18631       mrc     p15, 0, r14, c13, c0, 3
18632    */
18633    /* I don't know whether this is really v7-only.  But anyway, we
18634       have to support it since arm-linux uses TPIDRURO as a thread
18635       state register. */
18636    if (0x0E1D0F70 == (insn & 0x0FFF0FFF)) {
18637       UInt rD = INSN(15,12);
18638       if (rD <= 14) {
18639          /* skip r15, that's too stupid to handle */
18640          putIRegA(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32),
18641                       condT, Ijk_Boring);
18642          DIP("mrc%s p15,0, r%u, c13, c0, 3\n", nCC(INSN_COND), rD);
18643          goto decode_success;
18644       }
18645       /* fall through */
18646    }
18647
18648    /* ------------ read/write CP15 TPIDRURW register ----------- */
18649    /* mcr     p15, 0, r0,  c13, c0, 2 (r->cr xfer)  up to
18650       mcr     p15, 0, r14, c13, c0, 2
18651
18652       mrc     p15, 0, r0,  c13, c0, 2 (rc->r xfer)  up to
18653       mrc     p15, 0, r14, c13, c0, 2
18654    */
18655    if (0x0E0D0F50 == (insn & 0x0FFF0FFF)) { // MCR
18656       UInt rS = INSN(15,12);
18657       if (rS <= 14) {
18658          /* skip r15, that's too stupid to handle */
18659          putMiscReg32(OFFB_TPIDRURW, getIRegA(rS), condT);
18660          DIP("mcr%s p15,0, r%u, c13, c0, 2\n", nCC(INSN_COND), rS);
18661          goto decode_success;
18662       }
18663       /* fall through */
18664    }
18665    if (0x0E1D0F50 == (insn & 0x0FFF0FFF)) { // MRC
18666       UInt rD = INSN(15,12);
18667       if (rD <= 14) {
18668          /* skip r15, that's too stupid to handle */
18669          putIRegA(rD, IRExpr_Get(OFFB_TPIDRURW, Ity_I32),
18670                       condT, Ijk_Boring);
18671          DIP("mrc%s p15,0, r%u, c13, c0, 2\n", nCC(INSN_COND), rD);
18672          goto decode_success;
18673       }
18674       /* fall through */
18675    }
18676
18677    /* -------------- read CP15 PMUSRENR register ------------- */
18678    /* mrc     p15, 0, r0,  c9, c14, 0  up to
18679       mrc     p15, 0, r14, c9, c14, 0
18680    */
18681    /* A program reading this register is really asking "which
18682       performance monitoring registes are available in user space?
18683       The simple answer here is to return zero, meaning "none".  See
18684       #345984. */
18685    if (0x0E190F1E == (insn & 0x0FFF0FFF)) {
18686       UInt rD = INSN(15,12);
18687       if (rD <= 14) {
18688          /* skip r15, that's too stupid to handle */
18689          putIRegA(rD, mkU32(0), condT, Ijk_Boring);
18690          DIP("mrc%s p15,0, r%u, c9, c14, 0\n", nCC(INSN_COND), rD);
18691          goto decode_success;
18692       }
18693       /* fall through */
18694    }
18695
18696    /* Handle various kinds of barriers.  This is rather indiscriminate
18697       in the sense that they are all turned into an IR Fence, which
18698       means we don't know which they are, so the back end has to
18699       re-emit them all when it comes acrosss an IR Fence.
18700    */
18701    /* v6 */ /* mcr 15, 0, rT, c7, c10, 5 */
18702    if (0xEE070FBA == (insn & 0xFFFF0FFF)) {
18703       UInt rT = INSN(15,12);
18704       if (rT <= 14) {
18705          /* mcr 15, 0, rT, c7, c10, 5 (v6) equiv to DMB (v7).  Data
18706             Memory Barrier -- ensures ordering of memory accesses. */
18707          stmt( IRStmt_MBE(Imbe_Fence) );
18708          DIP("mcr 15, 0, r%u, c7, c10, 5 (data memory barrier)\n", rT);
18709          goto decode_success;
18710       }
18711       /* fall through */
18712    }
18713    /* other flavours of barrier */
18714    switch (insn) {
18715       case 0xEE070F9A: /* v6 */
18716          /* mcr 15, 0, r0, c7, c10, 4 (v6) equiv to DSB (v7).  Data
18717             Synch Barrier -- ensures completion of memory accesses. */
18718          stmt( IRStmt_MBE(Imbe_Fence) );
18719          DIP("mcr 15, 0, r0, c7, c10, 4 (data synch barrier)\n");
18720          goto decode_success;
18721       case 0xEE070F95: /* v6 */
18722          /* mcr 15, 0, r0, c7, c5, 4 (v6) equiv to ISB (v7).
18723             Instruction Synchronisation Barrier (or Flush Prefetch
18724             Buffer) -- a pipe flush, I think.  I suspect we could
18725             ignore those, but to be on the safe side emit a fence
18726             anyway. */
18727          stmt( IRStmt_MBE(Imbe_Fence) );
18728          DIP("mcr 15, 0, r0, c7, c5, 4 (insn synch barrier)\n");
18729          goto decode_success;
18730       default:
18731          break;
18732    }
18733
18734    /* ----------------------------------------------------------- */
18735    /* -- Hints                                                 -- */
18736    /* ----------------------------------------------------------- */
18737
18738    switch (insn & 0x0FFFFFFF) {
18739       /* ------------------- NOP ------------------ */
18740       case 0x0320F000:
18741          DIP("nop%s\n", nCC(INSN_COND));
18742          goto decode_success;
18743       /* ------------------- YIELD ------------------ */
18744       case 0x0320F001:
18745          /* Continue after conditionally yielding. */
18746          DIP("yield%s\n", nCC(INSN_COND));
18747          stmt( IRStmt_Exit( unop(Iop_32to1,
18748                                  condT == IRTemp_INVALID
18749                                     ? mkU32(1) : mkexpr(condT)),
18750                             Ijk_Yield,
18751                             IRConst_U32(guest_R15_curr_instr_notENC + 4),
18752                             OFFB_R15T ));
18753          goto decode_success;
18754       default:
18755          break;
18756    }
18757
18758    /* ----------------------------------------------------------- */
18759    /* -- VFP (CP 10, CP 11) instructions (in ARM mode)         -- */
18760    /* ----------------------------------------------------------- */
18761
18762    if (INSN_COND != ARMCondNV) {
18763       Bool ok_vfp = decode_CP10_CP11_instruction (
18764                        &dres, INSN(27,0), condT, INSN_COND,
18765                        False/*!isT*/
18766                     );
18767       if (ok_vfp)
18768          goto decode_success;
18769    }
18770
18771    /* ----------------------------------------------------------- */
18772    /* -- NEON instructions (in ARM mode)                       -- */
18773    /* ----------------------------------------------------------- */
18774
18775    /* These are all in NV space, and so are taken care of (far) above,
18776       by a call from this function to
18777       decode_NV_instruction_ARMv7_and_below(). */
18778
18779    /* ----------------------------------------------------------- */
18780    /* -- v6 media instructions (in ARM mode)                   -- */
18781    /* ----------------------------------------------------------- */
18782
18783    { Bool ok_v6m = decode_V6MEDIA_instruction(
18784                        &dres, INSN(27,0), condT, INSN_COND,
18785                        False/*!isT*/
18786                    );
18787      if (ok_v6m)
18788         goto decode_success;
18789    }
18790
18791    /* ----------------------------------------------------------- */
18792    /* -- v8 instructions (in ARM mode)                         -- */
18793    /* ----------------------------------------------------------- */
18794
18795   after_v7_decoder:
18796
18797    /* If we get here, it means that all attempts to decode the
18798       instruction as ARMv7 or earlier have failed.  So, if we're doing
18799       ARMv8 or later, here is the point to try for it. */
18800
18801    if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
18802       Bool ok_v8
18803          = decode_V8_instruction( &dres, insn, condT, False/*!isT*/,
18804                                   IRTemp_INVALID, IRTemp_INVALID );
18805       if (ok_v8)
18806          goto decode_success;
18807    }
18808
18809    /* ----------------------------------------------------------- */
18810    /* -- Undecodable                                           -- */
18811    /* ----------------------------------------------------------- */
18812
18813    goto decode_failure;
18814    /*NOTREACHED*/
18815
18816   decode_failure:
18817    /* All decode failures end up here. */
18818    if (sigill_diag) {
18819       vex_printf("disInstr(arm): unhandled instruction: "
18820                  "0x%x\n", insn);
18821       vex_printf("                 cond=%d(0x%x) 27:20=%d(0x%02x) "
18822                                    "4:4=%d "
18823                                    "3:0=%d(0x%x)\n",
18824                  (Int)INSN_COND, (UInt)INSN_COND,
18825                  (Int)INSN(27,20), (UInt)INSN(27,20),
18826                  (Int)INSN(4,4),
18827                  (Int)INSN(3,0), (UInt)INSN(3,0) );
18828    }
18829
18830    /* Tell the dispatcher that this insn cannot be decoded, and so has
18831       not been executed, and (is currently) the next to be executed.
18832       R15 should be up-to-date since it made so at the start of each
18833       insn, but nevertheless be paranoid and update it again right
18834       now. */
18835    vassert(0 == (guest_R15_curr_instr_notENC & 3));
18836    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
18837    dres.len         = 0;
18838    dres.whatNext    = Dis_StopHere;
18839    dres.jk_StopHere = Ijk_NoDecode;
18840    return dres;
18841
18842   decode_success:
18843    /* All decode successes end up here. */
18844    DIP("\n");
18845
18846    vassert(dres.len == 4 || dres.len == 20);
18847
18848    /* Now then.  Do we have an implicit jump to r15 to deal with? */
18849    if (r15written) {
18850       /* If we get jump to deal with, we assume that there's been no
18851          other competing branch stuff previously generated for this
18852          insn.  That's reasonable, in the sense that the ARM insn set
18853          appears to declare as "Unpredictable" any instruction which
18854          generates more than one possible new value for r15.  Hence
18855          just assert.  The decoders themselves should check against
18856          all such instructions which are thusly Unpredictable, and
18857          decline to decode them.  Hence we should never get here if we
18858          have competing new values for r15, and hence it is safe to
18859          assert here. */
18860       vassert(dres.whatNext == Dis_Continue);
18861       vassert(irsb->next == NULL);
18862       vassert(irsb->jumpkind == Ijk_Boring);
18863       /* If r15 is unconditionally written, terminate the block by
18864          jumping to it.  If it's conditionally written, still
18865          terminate the block (a shame, but we can't do side exits to
18866          arbitrary destinations), but first jump to the next
18867          instruction if the condition doesn't hold. */
18868       /* We can't use getIReg(15) to get the destination, since that
18869          will produce r15+8, which isn't what we want.  Must use
18870          llGetIReg(15) instead. */
18871       if (r15guard == IRTemp_INVALID) {
18872          /* unconditional */
18873       } else {
18874          /* conditional */
18875          stmt( IRStmt_Exit(
18876                   unop(Iop_32to1,
18877                        binop(Iop_Xor32,
18878                              mkexpr(r15guard), mkU32(1))),
18879                   r15kind,
18880                   IRConst_U32(guest_R15_curr_instr_notENC + 4),
18881                   OFFB_R15T
18882          ));
18883       }
18884       /* This seems crazy, but we're required to finish the insn with
18885          a write to the guest PC.  As usual we rely on ir_opt to tidy
18886          up later. */
18887       llPutIReg(15, llGetIReg(15));
18888       dres.whatNext    = Dis_StopHere;
18889       dres.jk_StopHere = r15kind;
18890    } else {
18891       /* Set up the end-state in the normal way. */
18892       switch (dres.whatNext) {
18893          case Dis_Continue:
18894             llPutIReg(15, mkU32(dres.len + guest_R15_curr_instr_notENC));
18895             break;
18896          case Dis_StopHere:
18897             break;
18898          default:
18899             vassert(0);
18900       }
18901    }
18902
18903    return dres;
18904
18905 #  undef INSN_COND
18906 #  undef INSN
18907 }
18908
18909
18910 /*------------------------------------------------------------*/
18911 /*--- Disassemble a single Thumb2 instruction              ---*/
18912 /*------------------------------------------------------------*/
18913
18914 static const UChar it_length_table[256]; /* fwds */
18915
18916 /* NB: in Thumb mode we do fetches of regs with getIRegT, which
18917    automagically adds 4 to fetches of r15.  However, writes to regs
18918    are done with putIRegT, which disallows writes to r15.  Hence any
18919    r15 writes and associated jumps have to be done "by hand". */
18920
18921 /* Disassemble a single Thumb instruction into IR.  The instruction is
18922    located in host memory at guest_instr, and has (decoded) guest IP
18923    of guest_R15_curr_instr_notENC, which will have been set before the
18924    call here. */
18925
18926 static
18927 DisResult disInstr_THUMB_WRK (
18928              const UChar* guest_instr,
18929              const VexArchInfo* archinfo,
18930              const VexAbiInfo*  abiinfo,
18931              Bool         sigill_diag
18932           )
18933 {
18934    /* A macro to fish bits out of insn0.  There's also INSN1, to fish
18935       bits out of insn1, but that's defined only after the end of the
18936       16-bit insn decoder, so as to stop it mistakenly being used
18937       therein. */
18938 #  define INSN0(_bMax,_bMin)  SLICE_UInt(((UInt)insn0), (_bMax), (_bMin))
18939
18940    DisResult dres;
18941    UShort    insn0; /*  first 16 bits of the insn */
18942    UShort    insn1; /* second 16 bits of the insn */
18943    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
18944
18945    /* Summary result of the ITxxx backwards analysis: False == safe
18946       but suboptimal. */
18947    Bool guaranteedUnconditional = False;
18948
18949    /* Set result defaults. */
18950    dres.whatNext    = Dis_Continue;
18951    dres.len         = 2;
18952    dres.jk_StopHere = Ijk_INVALID;
18953    dres.hint        = Dis_HintNone;
18954
18955    /* Set default actions for post-insn handling of writes to r15, if
18956       required. */
18957    r15written = False;
18958    r15guard   = IRTemp_INVALID; /* unconditional */
18959    r15kind    = Ijk_Boring;
18960
18961    /* Insns could be 2 or 4 bytes long.  Just get the first 16 bits at
18962       this point.  If we need the second 16, get them later.  We can't
18963       get them both out immediately because it risks a fault (very
18964       unlikely, but ..) if the second 16 bits aren't actually
18965       necessary. */
18966    insn0 = getUShortLittleEndianly( guest_instr );
18967    insn1 = 0; /* We'll get it later, once we know we need it. */
18968
18969    /* Similarly, will set this later. */
18970    IRTemp old_itstate = IRTemp_INVALID;
18971
18972    if (0) vex_printf("insn: 0x%x\n", insn0);
18973
18974    DIP("\t(thumb) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
18975
18976    vassert(0 == (guest_R15_curr_instr_notENC & 1));
18977
18978    /* ----------------------------------------------------------- */
18979    /* Spot "Special" instructions (see comment at top of file). */
18980    {
18981       const UChar* code = guest_instr;
18982       /* Spot the 16-byte preamble:
18983
18984          ea4f 0cfc  mov.w   ip, ip, ror #3
18985          ea4f 3c7c  mov.w   ip, ip, ror #13
18986          ea4f 7c7c  mov.w   ip, ip, ror #29
18987          ea4f 4cfc  mov.w   ip, ip, ror #19
18988       */
18989       UInt word1 = 0x0CFCEA4F;
18990       UInt word2 = 0x3C7CEA4F;
18991       UInt word3 = 0x7C7CEA4F;
18992       UInt word4 = 0x4CFCEA4F;
18993       if (getUIntLittleEndianly(code+ 0) == word1 &&
18994           getUIntLittleEndianly(code+ 4) == word2 &&
18995           getUIntLittleEndianly(code+ 8) == word3 &&
18996           getUIntLittleEndianly(code+12) == word4) {
18997          /* Got a "Special" instruction preamble.  Which one is it? */
18998          // 0x 0A 0A EA 4A
18999          if (getUIntLittleEndianly(code+16) == 0x0A0AEA4A
19000                                                /* orr.w r10,r10,r10 */) {
19001             /* R3 = client_request ( R4 ) */
19002             DIP("r3 = client_request ( %%r4 )\n");
19003             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
19004             dres.jk_StopHere = Ijk_ClientReq;
19005             dres.whatNext    = Dis_StopHere;
19006             goto decode_success;
19007          }
19008          else
19009          // 0x 0B 0B EA 4B
19010          if (getUIntLittleEndianly(code+16) == 0x0B0BEA4B
19011                                                /* orr r11,r11,r11 */) {
19012             /* R3 = guest_NRADDR */
19013             DIP("r3 = guest_NRADDR\n");
19014             dres.len = 20;
19015             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
19016             goto decode_success;
19017          }
19018          else
19019          // 0x 0C 0C EA 4C
19020          if (getUIntLittleEndianly(code+16) == 0x0C0CEA4C
19021                                                /* orr r12,r12,r12 */) {
19022             /*  branch-and-link-to-noredir R4 */
19023             DIP("branch-and-link-to-noredir r4\n");
19024             llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
19025             llPutIReg(15, getIRegT(4));
19026             dres.jk_StopHere = Ijk_NoRedir;
19027             dres.whatNext    = Dis_StopHere;
19028             goto decode_success;
19029          }
19030          else
19031          // 0x 09 09 EA 49
19032          if (getUIntLittleEndianly(code+16) == 0x0909EA49
19033                                                /* orr r9,r9,r9 */) {
19034             /* IR injection */
19035             DIP("IR injection\n");
19036             vex_inject_ir(irsb, Iend_LE);
19037             // Invalidate the current insn. The reason is that the IRop we're
19038             // injecting here can change. In which case the translation has to
19039             // be redone. For ease of handling, we simply invalidate all the
19040             // time.
19041             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
19042             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
19043             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
19044             dres.whatNext    = Dis_StopHere;
19045             dres.jk_StopHere = Ijk_InvalICache;
19046             goto decode_success;
19047          }
19048          /* We don't know what it is.  Set insn0 so decode_failure
19049             can print the insn following the Special-insn preamble. */
19050          insn0 = getUShortLittleEndianly(code+16);
19051          goto decode_failure;
19052          /*NOTREACHED*/
19053       }
19054
19055    }
19056
19057    /* ----------------------------------------------------------- */
19058
19059    /* Main Thumb instruction decoder starts here.  It's a series of
19060       switches which examine ever longer bit sequences at the MSB of
19061       the instruction word, first for 16-bit insns, then for 32-bit
19062       insns. */
19063
19064    /* --- BEGIN ITxxx optimisation analysis --- */
19065    /* This is a crucial optimisation for the ITState boilerplate that
19066       follows.  Examine the 9 halfwords preceding this instruction,
19067       and if we are absolutely sure that none of them constitute an
19068       'it' instruction, then we can be sure that this instruction is
19069       not under the control of any 'it' instruction, and so
19070       guest_ITSTATE must be zero.  So write zero into ITSTATE right
19071       now, so that iropt can fold out almost all of the resulting
19072       junk.
19073
19074       If we aren't sure, we can always safely skip this step.  So be a
19075       bit conservative about it: only poke around in the same page as
19076       this instruction, lest we get a fault from the previous page
19077       that would not otherwise have happened.  The saving grace is
19078       that such skipping is pretty rare -- it only happens,
19079       statistically, 18/4096ths of the time, so is judged unlikely to
19080       be a performance problems.
19081
19082       FIXME: do better.  Take into account the number of insns covered
19083       by any IT insns we find, to rule out cases where an IT clearly
19084       cannot cover this instruction.  This would improve behaviour for
19085       branch targets immediately following an IT-guarded group that is
19086       not of full length.  Eg, (and completely ignoring issues of 16-
19087       vs 32-bit insn length):
19088
19089              ite cond
19090              insn1
19091              insn2
19092       label: insn3
19093              insn4
19094
19095       The 'it' only conditionalises insn1 and insn2.  However, the
19096       current analysis is conservative and considers insn3 and insn4
19097       also possibly guarded.  Hence if 'label:' is the start of a hot
19098       loop we will get a big performance hit.
19099    */
19100    {
19101       /* Summary result of this analysis: False == safe but
19102          suboptimal. */
19103       vassert(guaranteedUnconditional == False);
19104
19105       UInt pc = guest_R15_curr_instr_notENC;
19106       vassert(0 == (pc & 1));
19107
19108       UInt pageoff = pc & 0xFFF;
19109       if (pageoff >= 18) {
19110          /* It's safe to poke about in the 9 halfwords preceding this
19111             insn.  So, have a look at them. */
19112          guaranteedUnconditional = True; /* assume no 'it' insn found,
19113                                             till we do */
19114          UShort* hwp = (UShort*)(HWord)pc;
19115          Int i;
19116          for (i = -1; i >= -9; i--) {
19117             /* We're in the same page.  (True, but commented out due
19118                to expense.) */
19119             /*
19120             vassert( ( ((UInt)(&hwp[i])) & 0xFFFFF000 )
19121                       == ( pc & 0xFFFFF000 ) );
19122             */
19123             /* All valid IT instructions must have the form 0xBFxy,
19124                where x can be anything, but y must be nonzero.  Find
19125                the number of insns covered by it (1 .. 4) and check to
19126                see if it can possibly reach up to the instruction in
19127                question.  Some (x,y) combinations mean UNPREDICTABLE,
19128                and the table is constructed to be conservative by
19129                returning 4 for those cases, so the analysis is safe
19130                even if the code uses unpredictable IT instructions (in
19131                which case its authors are nuts, but hey.)  */
19132             UShort hwp_i = hwp[i];
19133             if (UNLIKELY((hwp_i & 0xFF00) == 0xBF00 && (hwp_i & 0xF) != 0)) {
19134                /* might be an 'it' insn. */
19135                /* # guarded insns */
19136                Int n_guarded = (Int)it_length_table[hwp_i & 0xFF];
19137                vassert(n_guarded >= 1 && n_guarded <= 4);
19138                if (n_guarded * 2 /* # guarded HWs, worst case */
19139                    > (-(i+1)))   /* -(i+1): # remaining HWs after the IT */
19140                    /* -(i+0) also seems to work, even though I think
19141                       it's wrong.  I don't understand that. */
19142                   guaranteedUnconditional = False;
19143                break;
19144             }
19145          }
19146       }
19147    }
19148    /* --- END ITxxx optimisation analysis --- */
19149
19150    /* Generate the guarding condition for this insn, by examining
19151       ITSTATE.  Assign it to condT.  Also, generate new
19152       values for ITSTATE ready for stuffing back into the
19153       guest state, but don't actually do the Put yet, since it will
19154       need to stuffed back in only after the instruction gets to a
19155       point where it is sure to complete.  Mostly we let the code at
19156       decode_success handle this, but in cases where the insn contains
19157       a side exit, we have to update them before the exit. */
19158
19159    /* If the ITxxx optimisation analysis above could not prove that
19160       this instruction is guaranteed unconditional, we insert a
19161       lengthy IR preamble to compute the guarding condition at
19162       runtime.  If it can prove it (which obviously we hope is the
19163       normal case) then we insert a minimal preamble, which is
19164       equivalent to setting guest_ITSTATE to zero and then folding
19165       that through the full preamble (which completely disappears). */
19166
19167    IRTemp condT              = IRTemp_INVALID;
19168    IRTemp cond_AND_notInIT_T = IRTemp_INVALID;
19169
19170    IRTemp new_itstate        = IRTemp_INVALID;
19171    vassert(old_itstate == IRTemp_INVALID);
19172
19173    if (guaranteedUnconditional) {
19174       /* BEGIN "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
19175
19176       // ITSTATE = 0 :: I32
19177       IRTemp z32 = newTemp(Ity_I32);
19178       assign(z32, mkU32(0));
19179       put_ITSTATE(z32);
19180
19181       // old_itstate = 0 :: I32
19182       //
19183       // old_itstate = get_ITSTATE();
19184       old_itstate = z32; /* 0 :: I32 */
19185
19186       // new_itstate = old_itstate >> 8
19187       //             = 0 >> 8
19188       //             = 0 :: I32
19189       //
19190       // new_itstate = newTemp(Ity_I32);
19191       // assign(new_itstate,
19192       //        binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
19193       new_itstate = z32;
19194
19195       // ITSTATE = 0 :: I32(again)
19196       //
19197       // put_ITSTATE(new_itstate);
19198
19199       // condT1 = calc_cond_dyn( xor(and(old_istate,0xF0), 0xE0) )
19200       //        = calc_cond_dyn( xor(0,0xE0) )
19201       //        = calc_cond_dyn ( 0xE0 )
19202       //        = 1 :: I32
19203       // Not that this matters, since the computed value is not used:
19204       // see condT folding below
19205       //
19206       // IRTemp condT1 = newTemp(Ity_I32);
19207       // assign(condT1,
19208       //        mk_armg_calculate_condition_dyn(
19209       //           binop(Iop_Xor32,
19210       //                 binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
19211       //                 mkU32(0xE0))
19212       //       )
19213       // );
19214
19215       // condT = 32to8(and32(old_itstate,0xF0)) == 0  ? 1  : condT1
19216       //       = 32to8(and32(0,0xF0)) == 0  ? 1  : condT1
19217       //       = 32to8(0) == 0  ? 1  : condT1
19218       //       = 0 == 0  ? 1  : condT1
19219       //       = 1
19220       //
19221       // condT = newTemp(Ity_I32);
19222       // assign(condT, IRExpr_ITE(
19223       //                  unop(Iop_32to8, binop(Iop_And32,
19224       //                                        mkexpr(old_itstate),
19225       //                                        mkU32(0xF0))),
19226       //                  mkexpr(condT1),
19227       //                  mkU32(1))
19228       //       ));
19229       condT = newTemp(Ity_I32);
19230       assign(condT, mkU32(1));
19231
19232       // notInITt = xor32(and32(old_itstate, 1), 1)
19233       //          = xor32(and32(0, 1), 1)
19234       //          = xor32(0, 1)
19235       //          = 1 :: I32
19236       //
19237       // IRTemp notInITt = newTemp(Ity_I32);
19238       // assign(notInITt,
19239       //        binop(Iop_Xor32,
19240       //              binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
19241       //              mkU32(1)));
19242
19243       // cond_AND_notInIT_T = and32(notInITt, condT)
19244       //                    = and32(1, 1)
19245       //                    = 1
19246       //
19247       // cond_AND_notInIT_T = newTemp(Ity_I32);
19248       // assign(cond_AND_notInIT_T,
19249       //        binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
19250       cond_AND_notInIT_T = condT; /* 1 :: I32 */
19251
19252       /* END "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
19253    } else {
19254       /* BEGIN { STANDARD PREAMBLE; } */
19255
19256       old_itstate = get_ITSTATE();
19257
19258       new_itstate = newTemp(Ity_I32);
19259       assign(new_itstate,
19260              binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
19261
19262       put_ITSTATE(new_itstate);
19263
19264       /* Same strategy as for ARM insns: generate a condition
19265          temporary at this point (or IRTemp_INVALID, meaning
19266          unconditional).  We leave it to lower-level instruction
19267          decoders to decide whether they can generate straight-line
19268          code, or whether they must generate a side exit before the
19269          instruction.  condT :: Ity_I32 and is always either zero or
19270          one. */
19271       IRTemp condT1 = newTemp(Ity_I32);
19272       assign(condT1,
19273              mk_armg_calculate_condition_dyn(
19274                 binop(Iop_Xor32,
19275                       binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
19276                       mkU32(0xE0))
19277             )
19278       );
19279
19280       /* This is a bit complex, but needed to make Memcheck understand
19281          that, if the condition in old_itstate[7:4] denotes AL (that
19282          is, if this instruction is to be executed unconditionally),
19283          then condT does not depend on the results of calling the
19284          helper.
19285
19286          We test explicitly for old_itstate[7:4] == AL ^ 0xE, and in
19287          that case set condT directly to 1.  Else we use the results
19288          of the helper.  Since old_itstate is always defined and
19289          because Memcheck does lazy V-bit propagation through ITE,
19290          this will cause condT to always be a defined 1 if the
19291          condition is 'AL'.  From an execution semantics point of view
19292          this is irrelevant since we're merely duplicating part of the
19293          behaviour of the helper.  But it makes it clear to Memcheck,
19294          in this case, that condT does not in fact depend on the
19295          contents of the condition code thunk.  Without it, we get
19296          quite a lot of false errors.
19297
19298          So, just to clarify: from a straight semantics point of view,
19299          we can simply do "assign(condT, mkexpr(condT1))", and the
19300          simulator still runs fine.  It's just that we get loads of
19301          false errors from Memcheck. */
19302       condT = newTemp(Ity_I32);
19303       assign(condT, IRExpr_ITE(
19304                        binop(Iop_CmpNE32, binop(Iop_And32,
19305                                                 mkexpr(old_itstate),
19306                                                 mkU32(0xF0)),
19307                                           mkU32(0)),
19308                        mkexpr(condT1),
19309                        mkU32(1)
19310             ));
19311
19312       /* Something we don't have in ARM: generate a 0 or 1 value
19313          indicating whether or not we are in an IT block (NB: 0 = in
19314          IT block, 1 = not in IT block).  This is used to gate
19315          condition code updates in 16-bit Thumb instructions. */
19316       IRTemp notInITt = newTemp(Ity_I32);
19317       assign(notInITt,
19318              binop(Iop_Xor32,
19319                    binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
19320                    mkU32(1)));
19321
19322       /* Compute 'condT && notInITt' -- that is, the instruction is
19323          going to execute, and we're not in an IT block.  This is the
19324          gating condition for updating condition codes in 16-bit Thumb
19325          instructions, except for CMP, CMN and TST. */
19326       cond_AND_notInIT_T = newTemp(Ity_I32);
19327       assign(cond_AND_notInIT_T,
19328              binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
19329       /* END { STANDARD PREAMBLE; } */
19330    }
19331
19332
19333    /* At this point:
19334       * ITSTATE has been updated
19335       * condT holds the guarding condition for this instruction (0 or 1),
19336       * notInITt is 1 if we're in "normal" code, 0 if in an IT block
19337       * cond_AND_notInIT_T is the AND of the above two.
19338
19339       If the instruction proper can't trap, then there's nothing else
19340       to do w.r.t. ITSTATE -- just go and and generate IR for the
19341       insn, taking into account the guarding condition.
19342
19343       If, however, the instruction might trap, then we must back up
19344       ITSTATE to the old value, and re-update it after the potentially
19345       trapping IR section.  A trap can happen either via a memory
19346       reference or because we need to throw SIGILL.
19347
19348       If an instruction has a side exit, we need to be sure that any
19349       ITSTATE backup is re-updated before the side exit.
19350    */
19351
19352    /* ----------------------------------------------------------- */
19353    /* --                                                       -- */
19354    /* -- Thumb 16-bit integer instructions                     -- */
19355    /* --                                                       -- */
19356    /* -- IMPORTANT: references to insn1 or INSN1 are           -- */
19357    /* --            not allowed in this section                -- */
19358    /* --                                                       -- */
19359    /* ----------------------------------------------------------- */
19360
19361    /* 16-bit instructions inside an IT block, apart from CMP, CMN and
19362       TST, do not set the condition codes.  Hence we must dynamically
19363       test for this case for every condition code update. */
19364
19365    IROp   anOp   = Iop_INVALID;
19366    const HChar* anOpNm = NULL;
19367
19368    /* ================ 16-bit 15:6 cases ================ */
19369
19370    switch (INSN0(15,6)) {
19371
19372    case 0x10a:   // CMP
19373    case 0x10b: { // CMN
19374       /* ---------------- CMP Rn, Rm ---------------- */
19375       Bool   isCMN = INSN0(15,6) == 0x10b;
19376       UInt   rN    = INSN0(2,0);
19377       UInt   rM    = INSN0(5,3);
19378       IRTemp argL  = newTemp(Ity_I32);
19379       IRTemp argR  = newTemp(Ity_I32);
19380       assign( argL, getIRegT(rN) );
19381       assign( argR, getIRegT(rM) );
19382       /* Update flags regardless of whether in an IT block or not. */
19383       setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
19384                       argL, argR, condT );
19385       DIP("%s r%u, r%u\n", isCMN ? "cmn" : "cmp", rN, rM);
19386       goto decode_success;
19387    }
19388
19389    case 0x108: {
19390       /* ---------------- TST Rn, Rm ---------------- */
19391       UInt   rN   = INSN0(2,0);
19392       UInt   rM   = INSN0(5,3);
19393       IRTemp oldC = newTemp(Ity_I32);
19394       IRTemp oldV = newTemp(Ity_I32);
19395       IRTemp res  = newTemp(Ity_I32);
19396       assign( oldC, mk_armg_calculate_flag_c() );
19397       assign( oldV, mk_armg_calculate_flag_v() );
19398       assign( res,  binop(Iop_And32, getIRegT(rN), getIRegT(rM)) );
19399       /* Update flags regardless of whether in an IT block or not. */
19400       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
19401       DIP("tst r%u, r%u\n", rN, rM);
19402       goto decode_success;
19403    }
19404
19405    case 0x109: {
19406       /* ---------------- NEGS Rd, Rm ---------------- */
19407       /* Rd = -Rm */
19408       UInt   rM   = INSN0(5,3);
19409       UInt   rD   = INSN0(2,0);
19410       IRTemp arg  = newTemp(Ity_I32);
19411       IRTemp zero = newTemp(Ity_I32);
19412       assign(arg, getIRegT(rM));
19413       assign(zero, mkU32(0));
19414       // rD can never be r15
19415       putIRegT(rD, binop(Iop_Sub32, mkexpr(zero), mkexpr(arg)), condT);
19416       setFlags_D1_D2( ARMG_CC_OP_SUB, zero, arg, cond_AND_notInIT_T);
19417       DIP("negs r%u, r%u\n", rD, rM);
19418       goto decode_success;
19419    }
19420
19421    case 0x10F: {
19422       /* ---------------- MVNS Rd, Rm ---------------- */
19423       /* Rd = ~Rm */
19424       UInt   rM   = INSN0(5,3);
19425       UInt   rD   = INSN0(2,0);
19426       IRTemp oldV = newTemp(Ity_I32);
19427       IRTemp oldC = newTemp(Ity_I32);
19428       IRTemp res  = newTemp(Ity_I32);
19429       assign( oldV, mk_armg_calculate_flag_v() );
19430       assign( oldC, mk_armg_calculate_flag_c() );
19431       assign(res, unop(Iop_Not32, getIRegT(rM)));
19432       // rD can never be r15
19433       putIRegT(rD, mkexpr(res), condT);
19434       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19435                          cond_AND_notInIT_T );
19436       DIP("mvns r%u, r%u\n", rD, rM);
19437       goto decode_success;
19438    }
19439
19440    case 0x10C:
19441       /* ---------------- ORRS Rd, Rm ---------------- */
19442       anOp = Iop_Or32; anOpNm = "orr"; goto and_orr_eor_mul;
19443    case 0x100:
19444       /* ---------------- ANDS Rd, Rm ---------------- */
19445       anOp = Iop_And32; anOpNm = "and"; goto and_orr_eor_mul;
19446    case 0x101:
19447       /* ---------------- EORS Rd, Rm ---------------- */
19448       anOp = Iop_Xor32; anOpNm = "eor"; goto and_orr_eor_mul;
19449    case 0x10d:
19450       /* ---------------- MULS Rd, Rm ---------------- */
19451       anOp = Iop_Mul32; anOpNm = "mul"; goto and_orr_eor_mul;
19452    and_orr_eor_mul: {
19453       /* Rd = Rd `op` Rm */
19454       UInt   rM   = INSN0(5,3);
19455       UInt   rD   = INSN0(2,0);
19456       IRTemp res  = newTemp(Ity_I32);
19457       IRTemp oldV = newTemp(Ity_I32);
19458       IRTemp oldC = newTemp(Ity_I32);
19459       assign( oldV, mk_armg_calculate_flag_v() );
19460       assign( oldC, mk_armg_calculate_flag_c() );
19461       assign( res, binop(anOp, getIRegT(rD), getIRegT(rM) ));
19462       // not safe to read guest state after here
19463       // rD can never be r15
19464       putIRegT(rD, mkexpr(res), condT);
19465       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19466                          cond_AND_notInIT_T );
19467       DIP("%s r%u, r%u\n", anOpNm, rD, rM);
19468       goto decode_success;
19469    }
19470
19471    case 0x10E: {
19472       /* ---------------- BICS Rd, Rm ---------------- */
19473       /* Rd = Rd & ~Rm */
19474       UInt   rM   = INSN0(5,3);
19475       UInt   rD   = INSN0(2,0);
19476       IRTemp res  = newTemp(Ity_I32);
19477       IRTemp oldV = newTemp(Ity_I32);
19478       IRTemp oldC = newTemp(Ity_I32);
19479       assign( oldV, mk_armg_calculate_flag_v() );
19480       assign( oldC, mk_armg_calculate_flag_c() );
19481       assign( res, binop(Iop_And32, getIRegT(rD),
19482                                     unop(Iop_Not32, getIRegT(rM) )));
19483       // not safe to read guest state after here
19484       // rD can never be r15
19485       putIRegT(rD, mkexpr(res), condT);
19486       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19487                          cond_AND_notInIT_T );
19488       DIP("bics r%u, r%u\n", rD, rM);
19489       goto decode_success;
19490    }
19491
19492    case 0x105: {
19493       /* ---------------- ADCS Rd, Rm ---------------- */
19494       /* Rd = Rd + Rm + oldC */
19495       UInt   rM   = INSN0(5,3);
19496       UInt   rD   = INSN0(2,0);
19497       IRTemp argL = newTemp(Ity_I32);
19498       IRTemp argR = newTemp(Ity_I32);
19499       IRTemp oldC = newTemp(Ity_I32);
19500       IRTemp res  = newTemp(Ity_I32);
19501       assign(argL, getIRegT(rD));
19502       assign(argR, getIRegT(rM));
19503       assign(oldC, mk_armg_calculate_flag_c());
19504       assign(res, binop(Iop_Add32,
19505                         binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
19506                         mkexpr(oldC)));
19507       // rD can never be r15
19508       putIRegT(rD, mkexpr(res), condT);
19509       setFlags_D1_D2_ND( ARMG_CC_OP_ADC, argL, argR, oldC,
19510                          cond_AND_notInIT_T );
19511       DIP("adcs r%u, r%u\n", rD, rM);
19512       goto decode_success;
19513    }
19514
19515    case 0x106: {
19516       /* ---------------- SBCS Rd, Rm ---------------- */
19517       /* Rd = Rd - Rm - (oldC ^ 1) */
19518       UInt   rM   = INSN0(5,3);
19519       UInt   rD   = INSN0(2,0);
19520       IRTemp argL = newTemp(Ity_I32);
19521       IRTemp argR = newTemp(Ity_I32);
19522       IRTemp oldC = newTemp(Ity_I32);
19523       IRTemp res  = newTemp(Ity_I32);
19524       assign(argL, getIRegT(rD));
19525       assign(argR, getIRegT(rM));
19526       assign(oldC, mk_armg_calculate_flag_c());
19527       assign(res, binop(Iop_Sub32,
19528                         binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
19529                         binop(Iop_Xor32, mkexpr(oldC), mkU32(1))));
19530       // rD can never be r15
19531       putIRegT(rD, mkexpr(res), condT);
19532       setFlags_D1_D2_ND( ARMG_CC_OP_SBB, argL, argR, oldC,
19533                          cond_AND_notInIT_T );
19534       DIP("sbcs r%u, r%u\n", rD, rM);
19535       goto decode_success;
19536    }
19537
19538    case 0x2CB: {
19539       /* ---------------- UXTB Rd, Rm ---------------- */
19540       /* Rd = 8Uto32(Rm) */
19541       UInt rM = INSN0(5,3);
19542       UInt rD = INSN0(2,0);
19543       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFF)),
19544                    condT);
19545       DIP("uxtb r%u, r%u\n", rD, rM);
19546       goto decode_success;
19547    }
19548
19549    case 0x2C9: {
19550       /* ---------------- SXTB Rd, Rm ---------------- */
19551       /* Rd = 8Sto32(Rm) */
19552       UInt rM = INSN0(5,3);
19553       UInt rD = INSN0(2,0);
19554       putIRegT(rD, binop(Iop_Sar32,
19555                          binop(Iop_Shl32, getIRegT(rM), mkU8(24)),
19556                          mkU8(24)),
19557                    condT);
19558       DIP("sxtb r%u, r%u\n", rD, rM);
19559       goto decode_success;
19560    }
19561
19562    case 0x2CA: {
19563       /* ---------------- UXTH Rd, Rm ---------------- */
19564       /* Rd = 16Uto32(Rm) */
19565       UInt rM = INSN0(5,3);
19566       UInt rD = INSN0(2,0);
19567       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFFFF)),
19568                    condT);
19569       DIP("uxth r%u, r%u\n", rD, rM);
19570       goto decode_success;
19571    }
19572
19573    case 0x2C8: {
19574       /* ---------------- SXTH Rd, Rm ---------------- */
19575       /* Rd = 16Sto32(Rm) */
19576       UInt rM = INSN0(5,3);
19577       UInt rD = INSN0(2,0);
19578       putIRegT(rD, binop(Iop_Sar32,
19579                          binop(Iop_Shl32, getIRegT(rM), mkU8(16)),
19580                          mkU8(16)),
19581                    condT);
19582       DIP("sxth r%u, r%u\n", rD, rM);
19583       goto decode_success;
19584    }
19585
19586    case 0x102:   // LSLS
19587    case 0x103:   // LSRS
19588    case 0x104:   // ASRS
19589    case 0x107: { // RORS
19590       /* ---------------- LSLS Rs, Rd ---------------- */
19591       /* ---------------- LSRS Rs, Rd ---------------- */
19592       /* ---------------- ASRS Rs, Rd ---------------- */
19593       /* ---------------- RORS Rs, Rd ---------------- */
19594       /* Rd = Rd `op` Rs, and set flags */
19595       UInt   rS   = INSN0(5,3);
19596       UInt   rD   = INSN0(2,0);
19597       IRTemp oldV = newTemp(Ity_I32);
19598       IRTemp rDt  = newTemp(Ity_I32);
19599       IRTemp rSt  = newTemp(Ity_I32);
19600       IRTemp res  = newTemp(Ity_I32);
19601       IRTemp resC = newTemp(Ity_I32);
19602       const HChar* wot  = "???";
19603       assign(rSt, getIRegT(rS));
19604       assign(rDt, getIRegT(rD));
19605       assign(oldV, mk_armg_calculate_flag_v());
19606       /* Does not appear to be the standard 'how' encoding. */
19607       switch (INSN0(15,6)) {
19608          case 0x102:
19609             compute_result_and_C_after_LSL_by_reg(
19610                dis_buf, &res, &resC, rDt, rSt, rD, rS
19611             );
19612             wot = "lsl";
19613             break;
19614          case 0x103:
19615             compute_result_and_C_after_LSR_by_reg(
19616                dis_buf, &res, &resC, rDt, rSt, rD, rS
19617             );
19618             wot = "lsr";
19619             break;
19620          case 0x104:
19621             compute_result_and_C_after_ASR_by_reg(
19622                dis_buf, &res, &resC, rDt, rSt, rD, rS
19623             );
19624             wot = "asr";
19625             break;
19626          case 0x107:
19627             compute_result_and_C_after_ROR_by_reg(
19628                dis_buf, &res, &resC, rDt, rSt, rD, rS
19629             );
19630             wot = "ror";
19631             break;
19632          default:
19633             /*NOTREACHED*/vassert(0);
19634       }
19635       // not safe to read guest state after this point
19636       putIRegT(rD, mkexpr(res), condT);
19637       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
19638                          cond_AND_notInIT_T );
19639       DIP("%ss r%u, r%u\n", wot, rS, rD);
19640       goto decode_success;
19641    }
19642
19643    case 0x2E8:   // REV
19644    case 0x2E9: { // REV16
19645       /* ---------------- REV   Rd, Rm ---------------- */
19646       /* ---------------- REV16 Rd, Rm ---------------- */
19647       UInt rM = INSN0(5,3);
19648       UInt rD = INSN0(2,0);
19649       Bool isREV = INSN0(15,6) == 0x2E8;
19650       IRTemp arg = newTemp(Ity_I32);
19651       assign(arg, getIRegT(rM));
19652       IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
19653       putIRegT(rD, mkexpr(res), condT);
19654       DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM);
19655       goto decode_success;
19656    }
19657
19658    case 0x2EB: { // REVSH
19659       /* ---------------- REVSH Rd, Rn ---------------- */
19660       UInt rM = INSN0(5,3);
19661       UInt rD = INSN0(2,0);
19662       IRTemp irt_rM  = newTemp(Ity_I32);
19663       IRTemp irt_hi  = newTemp(Ity_I32);
19664       IRTemp irt_low = newTemp(Ity_I32);
19665       IRTemp irt_res = newTemp(Ity_I32);
19666       assign(irt_rM, getIRegT(rM));
19667       assign(irt_hi,
19668              binop(Iop_Sar32,
19669                    binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
19670                    mkU8(16)
19671              )
19672       );
19673       assign(irt_low,
19674              binop(Iop_And32,
19675                    binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
19676                    mkU32(0xFF)
19677              )
19678       );
19679       assign(irt_res,
19680              binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
19681       );
19682       putIRegT(rD, mkexpr(irt_res), condT);
19683       DIP("revsh r%u, r%u\n", rD, rM);
19684       goto decode_success;
19685    }
19686
19687    default:
19688       break; /* examine the next shortest prefix */
19689
19690    }
19691
19692
19693    /* ================ 16-bit 15:7 cases ================ */
19694
19695    switch (INSN0(15,7)) {
19696
19697    case BITS9(1,0,1,1,0,0,0,0,0): {
19698       /* ------------ ADD SP, #imm7 * 4 ------------ */
19699       UInt uimm7 = INSN0(6,0);
19700       putIRegT(13, binop(Iop_Add32, getIRegT(13), mkU32(uimm7 * 4)),
19701                    condT);
19702       DIP("add sp, #%u\n", uimm7 * 4);
19703       goto decode_success;
19704    }
19705
19706    case BITS9(1,0,1,1,0,0,0,0,1): {
19707       /* ------------ SUB SP, #imm7 * 4 ------------ */
19708       UInt uimm7 = INSN0(6,0);
19709       putIRegT(13, binop(Iop_Sub32, getIRegT(13), mkU32(uimm7 * 4)),
19710                    condT);
19711       DIP("sub sp, #%u\n", uimm7 * 4);
19712       goto decode_success;
19713    }
19714
19715    case BITS9(0,1,0,0,0,1,1,1,0): {
19716       /* ---------------- BX rM ---------------- */
19717       /* Branch to reg, and optionally switch modes.  Reg contains a
19718          suitably encoded address therefore (w CPSR.T at the bottom).
19719          Have to special-case r15, as usual. */
19720       UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
19721       if (BITS3(0,0,0) == INSN0(2,0)) {
19722          IRTemp dst = newTemp(Ity_I32);
19723          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19724          mk_skip_over_T16_if_cond_is_false(condT);
19725          condT = IRTemp_INVALID;
19726          // now uncond
19727          if (rM <= 14) {
19728             assign( dst, getIRegT(rM) );
19729          } else {
19730             vassert(rM == 15);
19731             assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
19732          }
19733          llPutIReg(15, mkexpr(dst));
19734          dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
19735          dres.whatNext    = Dis_StopHere;
19736          DIP("bx r%u (possibly switch to ARM mode)\n", rM);
19737          goto decode_success;
19738       }
19739       break;
19740    }
19741
19742    /* ---------------- BLX rM ---------------- */
19743    /* Branch and link to interworking address in rM. */
19744    case BITS9(0,1,0,0,0,1,1,1,1): {
19745       if (BITS3(0,0,0) == INSN0(2,0)) {
19746          UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
19747          IRTemp dst = newTemp(Ity_I32);
19748          if (rM <= 14) {
19749             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19750             mk_skip_over_T16_if_cond_is_false(condT);
19751             condT = IRTemp_INVALID;
19752             // now uncond
19753             /* We're returning to Thumb code, hence "| 1" */
19754             assign( dst, getIRegT(rM) );
19755             putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
19756                           IRTemp_INVALID );
19757             llPutIReg(15, mkexpr(dst));
19758             dres.jk_StopHere = Ijk_Call;
19759             dres.whatNext    = Dis_StopHere;
19760             DIP("blx r%u (possibly switch to ARM mode)\n", rM);
19761             goto decode_success;
19762          }
19763          /* else unpredictable, fall through */
19764       }
19765       break;
19766    }
19767
19768    default:
19769       break; /* examine the next shortest prefix */
19770
19771    }
19772
19773
19774    /* ================ 16-bit 15:8 cases ================ */
19775
19776    switch (INSN0(15,8)) {
19777
19778    case BITS8(1,1,0,1,1,1,1,1): {
19779       /* ---------------- SVC ---------------- */
19780       UInt imm8 = INSN0(7,0);
19781       if (imm8 == 0) {
19782          /* A syscall.  We can't do this conditionally, hence: */
19783          mk_skip_over_T16_if_cond_is_false( condT );
19784          // FIXME: what if we have to back up and restart this insn?
19785          // then ITSTATE will be wrong (we'll have it as "used")
19786          // when it isn't.  Correct is to save ITSTATE in a
19787          // stash pseudo-reg, and back up from that if we have to
19788          // restart.
19789          // uncond after here
19790          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ));
19791          dres.jk_StopHere = Ijk_Sys_syscall;
19792          dres.whatNext    = Dis_StopHere;
19793          DIP("svc #0x%08x\n", imm8);
19794          goto decode_success;
19795       }
19796       /* else fall through */
19797       break;
19798    }
19799
19800    case BITS8(0,1,0,0,0,1,0,0): {
19801       /* ---------------- ADD(HI) Rd, Rm ---------------- */
19802       UInt h1 = INSN0(7,7);
19803       UInt h2 = INSN0(6,6);
19804       UInt rM = (h2 << 3) | INSN0(5,3);
19805       UInt rD = (h1 << 3) | INSN0(2,0);
19806       //if (h1 == 0 && h2 == 0) { // Original T1 was more restrictive
19807       if (rD == 15 && rM == 15) {
19808          // then it's invalid
19809       } else {
19810          IRTemp res = newTemp(Ity_I32);
19811          assign( res, binop(Iop_Add32, getIRegT(rD), getIRegT(rM) ));
19812          if (rD != 15) {
19813             putIRegT( rD, mkexpr(res), condT );
19814          } else {
19815             /* Only allowed outside or last-in IT block; SIGILL if not so. */
19816             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19817             /* jump over insn if not selected */
19818             mk_skip_over_T16_if_cond_is_false(condT);
19819             condT = IRTemp_INVALID;
19820             // now uncond
19821             /* non-interworking branch */
19822             llPutIReg(15, binop(Iop_Or32, mkexpr(res), mkU32(1)));
19823             dres.jk_StopHere = Ijk_Boring;
19824             dres.whatNext    = Dis_StopHere;
19825          }
19826          DIP("add(hi) r%u, r%u\n", rD, rM);
19827          goto decode_success;
19828       }
19829       break;
19830    }
19831
19832    case BITS8(0,1,0,0,0,1,0,1): {
19833       /* ---------------- CMP(HI) Rd, Rm ---------------- */
19834       UInt h1 = INSN0(7,7);
19835       UInt h2 = INSN0(6,6);
19836       UInt rM = (h2 << 3) | INSN0(5,3);
19837       UInt rN = (h1 << 3) | INSN0(2,0);
19838       if (h1 != 0 || h2 != 0) {
19839          IRTemp argL  = newTemp(Ity_I32);
19840          IRTemp argR  = newTemp(Ity_I32);
19841          assign( argL, getIRegT(rN) );
19842          assign( argR, getIRegT(rM) );
19843          /* Update flags regardless of whether in an IT block or not. */
19844          setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
19845          DIP("cmphi r%u, r%u\n", rN, rM);
19846          goto decode_success;
19847       }
19848       break;
19849    }
19850
19851    case BITS8(0,1,0,0,0,1,1,0): {
19852       /* ---------------- MOV(HI) Rd, Rm ---------------- */
19853       UInt h1 = INSN0(7,7);
19854       UInt h2 = INSN0(6,6);
19855       UInt rM = (h2 << 3) | INSN0(5,3);
19856       UInt rD = (h1 << 3) | INSN0(2,0);
19857       /* The old ARM ARM seems to disallow the case where both Rd and
19858          Rm are "low" registers, but newer versions allow it. */
19859       if (1 /*h1 != 0 || h2 != 0*/) {
19860          IRTemp val = newTemp(Ity_I32);
19861          assign( val, getIRegT(rM) );
19862          if (rD != 15) {
19863             putIRegT( rD, mkexpr(val), condT );
19864          } else {
19865             /* Only allowed outside or last-in IT block; SIGILL if not so. */
19866             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19867             /* jump over insn if not selected */
19868             mk_skip_over_T16_if_cond_is_false(condT);
19869             condT = IRTemp_INVALID;
19870             // now uncond
19871             /* non-interworking branch */
19872             llPutIReg(15, binop(Iop_Or32, mkexpr(val), mkU32(1)));
19873             dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
19874             dres.whatNext    = Dis_StopHere;
19875          }
19876          DIP("mov r%u, r%u\n", rD, rM);
19877          goto decode_success;
19878       }
19879       break;
19880    }
19881
19882    case BITS8(1,0,1,1,1,1,1,1): {
19883       /* ---------------- IT (if-then) ---------------- */
19884       UInt firstcond = INSN0(7,4);
19885       UInt mask = INSN0(3,0);
19886       UInt newITSTATE = 0;
19887       /* This is the ITSTATE represented as described in
19888          libvex_guest_arm.h.  It is not the ARM ARM representation. */
19889       HChar c1 = '.';
19890       HChar c2 = '.';
19891       HChar c3 = '.';
19892       Bool valid = compute_ITSTATE( &newITSTATE, &c1, &c2, &c3,
19893                                     firstcond, mask );
19894       if (valid && firstcond != 0xF/*NV*/) {
19895          /* Not allowed in an IT block; SIGILL if so. */
19896          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
19897
19898          IRTemp t = newTemp(Ity_I32);
19899          assign(t, mkU32(newITSTATE));
19900          put_ITSTATE(t);
19901
19902          DIP("it%c%c%c %s\n", c1, c2, c3, nCC(firstcond));
19903          goto decode_success;
19904       }
19905       break;
19906    }
19907
19908    case BITS8(1,0,1,1,0,0,0,1):
19909    case BITS8(1,0,1,1,0,0,1,1):
19910    case BITS8(1,0,1,1,1,0,0,1):
19911    case BITS8(1,0,1,1,1,0,1,1): {
19912       /* ---------------- CB{N}Z ---------------- */
19913       UInt rN    = INSN0(2,0);
19914       UInt bOP   = INSN0(11,11);
19915       UInt imm32 = (INSN0(9,9) << 6) | (INSN0(7,3) << 1);
19916       gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
19917       /* It's a conditional branch forward. */
19918       IRTemp kond = newTemp(Ity_I1);
19919       assign( kond, binop(bOP ? Iop_CmpNE32 : Iop_CmpEQ32,
19920                           getIRegT(rN), mkU32(0)) );
19921
19922       vassert(0 == (guest_R15_curr_instr_notENC & 1));
19923       /* Looks like the nearest insn we can branch to is the one after
19924          next.  That makes sense, as there's no point in being able to
19925          encode a conditional branch to the next instruction. */
19926       UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
19927       stmt(IRStmt_Exit( mkexpr(kond),
19928                         Ijk_Boring,
19929                         IRConst_U32(toUInt(dst)),
19930                         OFFB_R15T ));
19931       DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
19932       goto decode_success;
19933    }
19934
19935    default:
19936       break; /* examine the next shortest prefix */
19937
19938    }
19939
19940
19941    /* ================ 16-bit 15:9 cases ================ */
19942
19943    switch (INSN0(15,9)) {
19944
19945    case BITS7(1,0,1,1,0,1,0): {
19946       /* ---------------- PUSH ---------------- */
19947       /* This is a bit like STMxx, but way simpler. Complications we
19948          don't have to deal with:
19949          * SP being one of the transferred registers
19950          * direction (increment vs decrement)
19951          * before-vs-after-ness
19952       */
19953       Int  i, nRegs;
19954       UInt bitR    = INSN0(8,8);
19955       UInt regList = INSN0(7,0);
19956       if (bitR) regList |= (1 << 14);
19957
19958       /* At least one register must be transferred, else result is
19959          UNPREDICTABLE. */
19960       if (regList != 0) {
19961          /* Since we can't generate a guaranteed non-trapping IR
19962             sequence, (1) jump over the insn if it is gated false, and
19963             (2) back out the ITSTATE update. */
19964          mk_skip_over_T16_if_cond_is_false(condT);
19965          condT = IRTemp_INVALID;
19966          put_ITSTATE(old_itstate);
19967          // now uncond
19968
19969          nRegs = 0;
19970          for (i = 0; i < 16; i++) {
19971             if ((regList & (1 << i)) != 0)
19972                nRegs++;
19973          }
19974          vassert(nRegs >= 1 && nRegs <= 9);
19975
19976          /* Move SP down first of all, so we're "covered".  And don't
19977             mess with its alignment. */
19978          IRTemp newSP = newTemp(Ity_I32);
19979          assign(newSP, binop(Iop_Sub32, getIRegT(13), mkU32(4 * nRegs)));
19980          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
19981
19982          /* Generate a transfer base address as a forced-aligned
19983             version of the final SP value. */
19984          IRTemp base = newTemp(Ity_I32);
19985          assign(base, binop(Iop_And32, mkexpr(newSP), mkU32(~3)));
19986
19987          /* Now the transfers */
19988          nRegs = 0;
19989          for (i = 0; i < 16; i++) {
19990             if ((regList & (1 << i)) != 0) {
19991                storeLE( binop(Iop_Add32, mkexpr(base), mkU32(4 * nRegs)),
19992                         getIRegT(i) );
19993                nRegs++;
19994             }
19995          }
19996
19997          /* Reinstate the ITSTATE update. */
19998          put_ITSTATE(new_itstate);
19999
20000          DIP("push {%s0x%04x}\n", bitR ? "lr," : "", regList & 0xFF);
20001          goto decode_success;
20002       }
20003       break;
20004    }
20005
20006    case BITS7(1,0,1,1,1,1,0): {
20007       /* ---------------- POP ---------------- */
20008       Int  i, nRegs;
20009       UInt bitR    = INSN0(8,8);
20010       UInt regList = INSN0(7,0);
20011
20012       /* At least one register must be transferred, else result is
20013          UNPREDICTABLE. */
20014       if (regList != 0 || bitR) {
20015          /* Since we can't generate a guaranteed non-trapping IR
20016             sequence, (1) jump over the insn if it is gated false, and
20017             (2) back out the ITSTATE update. */
20018          mk_skip_over_T16_if_cond_is_false(condT);
20019          condT = IRTemp_INVALID;
20020          put_ITSTATE(old_itstate);
20021          // now uncond
20022
20023          nRegs = 0;
20024          for (i = 0; i < 8; i++) {
20025             if ((regList & (1 << i)) != 0)
20026                nRegs++;
20027          }
20028          vassert(nRegs >= 0 && nRegs <= 8);
20029          vassert(bitR == 0 || bitR == 1);
20030
20031          IRTemp oldSP = newTemp(Ity_I32);
20032          assign(oldSP, getIRegT(13));
20033
20034          /* Generate a transfer base address as a forced-aligned
20035             version of the original SP value. */
20036          IRTemp base = newTemp(Ity_I32);
20037          assign(base, binop(Iop_And32, mkexpr(oldSP), mkU32(~3)));
20038
20039          /* Compute a new value for SP, but don't install it yet, so
20040             that we're "covered" until all the transfers are done.
20041             And don't mess with its alignment. */
20042          IRTemp newSP = newTemp(Ity_I32);
20043          assign(newSP, binop(Iop_Add32, mkexpr(oldSP),
20044                                         mkU32(4 * (nRegs + bitR))));
20045
20046          /* Now the transfers, not including PC */
20047          nRegs = 0;
20048          for (i = 0; i < 8; i++) {
20049             if ((regList & (1 << i)) != 0) {
20050                putIRegT(i, loadLE( Ity_I32,
20051                                    binop(Iop_Add32, mkexpr(base),
20052                                                     mkU32(4 * nRegs))),
20053                            IRTemp_INVALID );
20054                nRegs++;
20055             }
20056          }
20057
20058          IRTemp newPC = IRTemp_INVALID;
20059          if (bitR) {
20060             newPC = newTemp(Ity_I32);
20061             assign( newPC, loadLE( Ity_I32,
20062                                    binop(Iop_Add32, mkexpr(base),
20063                                                     mkU32(4 * nRegs))));
20064          }
20065
20066          /* Now we can safely install the new SP value */
20067          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
20068
20069          /* Reinstate the ITSTATE update. */
20070          put_ITSTATE(new_itstate);
20071
20072          /* now, do we also have to do a branch?  If so, it turns out
20073             that the new PC value is encoded exactly as we need it to
20074             be -- with CPSR.T in the bottom bit.  So we can simply use
20075             it as is, no need to mess with it.  Note, therefore, this
20076             is an interworking return. */
20077          if (bitR) {
20078             llPutIReg(15, mkexpr(newPC));
20079             dres.jk_StopHere = Ijk_Ret;
20080             dres.whatNext    = Dis_StopHere;
20081          }
20082
20083          DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
20084          goto decode_success;
20085       }
20086       break;
20087    }
20088
20089    case BITS7(0,0,0,1,1,1,0):   /* ADDS */
20090    case BITS7(0,0,0,1,1,1,1): { /* SUBS */
20091       /* ---------------- ADDS Rd, Rn, #uimm3 ---------------- */
20092       /* ---------------- SUBS Rd, Rn, #uimm3 ---------------- */
20093       UInt   uimm3 = INSN0(8,6);
20094       UInt   rN    = INSN0(5,3);
20095       UInt   rD    = INSN0(2,0);
20096       UInt   isSub = INSN0(9,9);
20097       IRTemp argL  = newTemp(Ity_I32);
20098       IRTemp argR  = newTemp(Ity_I32);
20099       assign( argL, getIRegT(rN) );
20100       assign( argR, mkU32(uimm3) );
20101       putIRegT(rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
20102                          mkexpr(argL), mkexpr(argR)),
20103                    condT);
20104       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
20105                       argL, argR, cond_AND_notInIT_T );
20106       DIP("%s r%u, r%u, #%u\n", isSub ? "subs" : "adds", rD, rN, uimm3);
20107       goto decode_success;
20108    }
20109
20110    case BITS7(0,0,0,1,1,0,0):   /* ADDS */
20111    case BITS7(0,0,0,1,1,0,1): { /* SUBS */
20112       /* ---------------- ADDS Rd, Rn, Rm ---------------- */
20113       /* ---------------- SUBS Rd, Rn, Rm ---------------- */
20114       UInt   rM    = INSN0(8,6);
20115       UInt   rN    = INSN0(5,3);
20116       UInt   rD    = INSN0(2,0);
20117       UInt   isSub = INSN0(9,9);
20118       IRTemp argL  = newTemp(Ity_I32);
20119       IRTemp argR  = newTemp(Ity_I32);
20120       assign( argL, getIRegT(rN) );
20121       assign( argR, getIRegT(rM) );
20122       putIRegT( rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
20123                           mkexpr(argL), mkexpr(argR)),
20124                     condT );
20125       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
20126                       argL, argR, cond_AND_notInIT_T );
20127       DIP("%s r%u, r%u, r%u\n", isSub ? "subs" : "adds", rD, rN, rM);
20128       goto decode_success;
20129    }
20130
20131    case BITS7(0,1,0,1,0,0,0):   /* STR */
20132    case BITS7(0,1,0,1,1,0,0): { /* LDR */
20133       /* ------------- LDR Rd, [Rn, Rm] ------------- */
20134       /* ------------- STR Rd, [Rn, Rm] ------------- */
20135       /* LDR/STR Rd, [Rn + Rm] */
20136       UInt    rD   = INSN0(2,0);
20137       UInt    rN   = INSN0(5,3);
20138       UInt    rM   = INSN0(8,6);
20139       UInt    isLD = INSN0(11,11);
20140
20141       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20142       put_ITSTATE(old_itstate); // backout
20143       if (isLD) {
20144          IRTemp tD = newTemp(Ity_I32);
20145          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
20146          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20147       } else {
20148          storeGuardedLE(ea, getIRegT(rD), condT);
20149       }
20150       put_ITSTATE(new_itstate); // restore
20151
20152       DIP("%s r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
20153       goto decode_success;
20154    }
20155
20156    case BITS7(0,1,0,1,0,0,1):
20157    case BITS7(0,1,0,1,1,0,1): {
20158       /* ------------- LDRH Rd, [Rn, Rm] ------------- */
20159       /* ------------- STRH Rd, [Rn, Rm] ------------- */
20160       /* LDRH/STRH Rd, [Rn + Rm] */
20161       UInt    rD   = INSN0(2,0);
20162       UInt    rN   = INSN0(5,3);
20163       UInt    rM   = INSN0(8,6);
20164       UInt    isLD = INSN0(11,11);
20165
20166       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20167       put_ITSTATE(old_itstate); // backout
20168       if (isLD) {
20169          IRTemp tD = newTemp(Ity_I32);
20170          loadGuardedLE(tD, ILGop_16Uto32, ea, llGetIReg(rD), condT);
20171          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20172       } else {
20173          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
20174       }
20175       put_ITSTATE(new_itstate); // restore
20176
20177       DIP("%sh r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
20178       goto decode_success;
20179    }
20180
20181    case BITS7(0,1,0,1,1,1,1): {
20182       /* ------------- LDRSH Rd, [Rn, Rm] ------------- */
20183       /* LDRSH Rd, [Rn + Rm] */
20184       UInt    rD = INSN0(2,0);
20185       UInt    rN = INSN0(5,3);
20186       UInt    rM = INSN0(8,6);
20187
20188       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20189       put_ITSTATE(old_itstate); // backout
20190       IRTemp tD = newTemp(Ity_I32);
20191       loadGuardedLE(tD, ILGop_16Sto32, ea, llGetIReg(rD), condT);
20192       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20193       put_ITSTATE(new_itstate); // restore
20194
20195       DIP("ldrsh r%u, [r%u, r%u]\n", rD, rN, rM);
20196       goto decode_success;
20197    }
20198
20199    case BITS7(0,1,0,1,0,1,1): {
20200       /* ------------- LDRSB Rd, [Rn, Rm] ------------- */
20201       /* LDRSB Rd, [Rn + Rm] */
20202       UInt    rD = INSN0(2,0);
20203       UInt    rN = INSN0(5,3);
20204       UInt    rM = INSN0(8,6);
20205
20206       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20207       put_ITSTATE(old_itstate); // backout
20208       IRTemp tD = newTemp(Ity_I32);
20209       loadGuardedLE(tD, ILGop_8Sto32, ea, llGetIReg(rD), condT);
20210       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20211       put_ITSTATE(new_itstate); // restore
20212
20213       DIP("ldrsb r%u, [r%u, r%u]\n", rD, rN, rM);
20214       goto decode_success;
20215    }
20216
20217    case BITS7(0,1,0,1,0,1,0):
20218    case BITS7(0,1,0,1,1,1,0): {
20219       /* ------------- LDRB Rd, [Rn, Rm] ------------- */
20220       /* ------------- STRB Rd, [Rn, Rm] ------------- */
20221       /* LDRB/STRB Rd, [Rn + Rm] */
20222       UInt    rD   = INSN0(2,0);
20223       UInt    rN   = INSN0(5,3);
20224       UInt    rM   = INSN0(8,6);
20225       UInt    isLD = INSN0(11,11);
20226
20227       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20228       put_ITSTATE(old_itstate); // backout
20229       if (isLD) {
20230          IRTemp tD = newTemp(Ity_I32);
20231          loadGuardedLE(tD, ILGop_8Uto32, ea, llGetIReg(rD), condT);
20232          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20233       } else {
20234          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
20235       }
20236       put_ITSTATE(new_itstate); // restore
20237
20238       DIP("%sb r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
20239       goto decode_success;
20240    }
20241
20242    default:
20243       break; /* examine the next shortest prefix */
20244
20245    }
20246
20247
20248    /* ================ 16-bit 15:11 cases ================ */
20249
20250    switch (INSN0(15,11)) {
20251
20252    case BITS5(0,0,1,1,0):
20253    case BITS5(0,0,1,1,1): {
20254       /* ---------------- ADDS Rn, #uimm8 ---------------- */
20255       /* ---------------- SUBS Rn, #uimm8 ---------------- */
20256       UInt   isSub = INSN0(11,11);
20257       UInt   rN    = INSN0(10,8);
20258       UInt   uimm8 = INSN0(7,0);
20259       IRTemp argL  = newTemp(Ity_I32);
20260       IRTemp argR  = newTemp(Ity_I32);
20261       assign( argL, getIRegT(rN) );
20262       assign( argR, mkU32(uimm8) );
20263       putIRegT( rN, binop(isSub ? Iop_Sub32 : Iop_Add32,
20264                           mkexpr(argL), mkexpr(argR)), condT );
20265       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
20266                       argL, argR, cond_AND_notInIT_T );
20267       DIP("%s r%u, #%u\n", isSub ? "subs" : "adds", rN, uimm8);
20268       goto decode_success;
20269    }
20270
20271    case BITS5(1,0,1,0,0): {
20272       /* ---------------- ADD rD, PC, #imm8 * 4 ---------------- */
20273       /* a.k.a. ADR */
20274       /* rD = align4(PC) + imm8 * 4 */
20275       UInt rD   = INSN0(10,8);
20276       UInt imm8 = INSN0(7,0);
20277       putIRegT(rD, binop(Iop_Add32,
20278                          binop(Iop_And32, getIRegT(15), mkU32(~3U)),
20279                          mkU32(imm8 * 4)),
20280                    condT);
20281       DIP("add r%u, pc, #%u\n", rD, imm8 * 4);
20282       goto decode_success;
20283    }
20284
20285    case BITS5(1,0,1,0,1): {
20286       /* ---------------- ADD rD, SP, #imm8 * 4 ---------------- */
20287       UInt rD   = INSN0(10,8);
20288       UInt imm8 = INSN0(7,0);
20289       putIRegT(rD, binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4)),
20290                    condT);
20291       DIP("add r%u, r13, #%u\n", rD, imm8 * 4);
20292       goto decode_success;
20293    }
20294
20295    case BITS5(0,0,1,0,1): {
20296       /* ---------------- CMP Rn, #uimm8 ---------------- */
20297       UInt   rN    = INSN0(10,8);
20298       UInt   uimm8 = INSN0(7,0);
20299       IRTemp argL  = newTemp(Ity_I32);
20300       IRTemp argR  = newTemp(Ity_I32);
20301       assign( argL, getIRegT(rN) );
20302       assign( argR, mkU32(uimm8) );
20303       /* Update flags regardless of whether in an IT block or not. */
20304       setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
20305       DIP("cmp r%u, #%u\n", rN, uimm8);
20306       goto decode_success;
20307    }
20308
20309    case BITS5(0,0,1,0,0): {
20310       /* -------------- (T1) MOVS Rn, #uimm8 -------------- */
20311       UInt   rD    = INSN0(10,8);
20312       UInt   uimm8 = INSN0(7,0);
20313       IRTemp oldV  = newTemp(Ity_I32);
20314       IRTemp oldC  = newTemp(Ity_I32);
20315       IRTemp res   = newTemp(Ity_I32);
20316       assign( oldV, mk_armg_calculate_flag_v() );
20317       assign( oldC, mk_armg_calculate_flag_c() );
20318       assign( res, mkU32(uimm8) );
20319       putIRegT(rD, mkexpr(res), condT);
20320       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
20321                          cond_AND_notInIT_T );
20322       DIP("movs r%u, #%u\n", rD, uimm8);
20323       goto decode_success;
20324    }
20325
20326    case BITS5(0,1,0,0,1): {
20327       /* ------------- LDR Rd, [PC, #imm8 * 4] ------------- */
20328       /* LDR Rd, [align4(PC) + imm8 * 4] */
20329       UInt   rD   = INSN0(10,8);
20330       UInt   imm8 = INSN0(7,0);
20331       IRTemp ea   = newTemp(Ity_I32);
20332
20333       assign(ea, binop(Iop_Add32,
20334                        binop(Iop_And32, getIRegT(15), mkU32(~3U)),
20335                        mkU32(imm8 * 4)));
20336       put_ITSTATE(old_itstate); // backout
20337       IRTemp tD = newTemp(Ity_I32);
20338       loadGuardedLE( tD, ILGop_Ident32, mkexpr(ea), llGetIReg(rD), condT );
20339       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20340       put_ITSTATE(new_itstate); // restore
20341
20342       DIP("ldr r%u, [pc, #%u]\n", rD, imm8 * 4);
20343       goto decode_success;
20344    }
20345
20346    case BITS5(0,1,1,0,0):   /* STR */
20347    case BITS5(0,1,1,0,1): { /* LDR */
20348       /* ------------- LDR Rd, [Rn, #imm5 * 4] ------------- */
20349       /* ------------- STR Rd, [Rn, #imm5 * 4] ------------- */
20350       /* LDR/STR Rd, [Rn + imm5 * 4] */
20351       UInt    rD   = INSN0(2,0);
20352       UInt    rN   = INSN0(5,3);
20353       UInt    imm5 = INSN0(10,6);
20354       UInt    isLD = INSN0(11,11);
20355
20356       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 4));
20357       put_ITSTATE(old_itstate); // backout
20358       if (isLD) {
20359          IRTemp tD = newTemp(Ity_I32);
20360          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
20361          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20362       } else {
20363          storeGuardedLE( ea, getIRegT(rD), condT );
20364       }
20365       put_ITSTATE(new_itstate); // restore
20366
20367       DIP("%s r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 4);
20368       goto decode_success;
20369    }
20370
20371    case BITS5(1,0,0,0,0):   /* STRH */
20372    case BITS5(1,0,0,0,1): { /* LDRH */
20373       /* ------------- LDRH Rd, [Rn, #imm5 * 2] ------------- */
20374       /* ------------- STRH Rd, [Rn, #imm5 * 2] ------------- */
20375       /* LDRH/STRH Rd, [Rn + imm5 * 2] */
20376       UInt    rD   = INSN0(2,0);
20377       UInt    rN   = INSN0(5,3);
20378       UInt    imm5 = INSN0(10,6);
20379       UInt    isLD = INSN0(11,11);
20380
20381       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 2));
20382       put_ITSTATE(old_itstate); // backout
20383       if (isLD) {
20384          IRTemp tD = newTemp(Ity_I32);
20385          loadGuardedLE( tD, ILGop_16Uto32, ea, llGetIReg(rD), condT );
20386          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20387       } else {
20388          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
20389       }
20390       put_ITSTATE(new_itstate); // restore
20391
20392       DIP("%sh r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 2);
20393       goto decode_success;
20394    }
20395
20396    case BITS5(0,1,1,1,0):   /* STRB */
20397    case BITS5(0,1,1,1,1): { /* LDRB */
20398       /* ------------- LDRB Rd, [Rn, #imm5] ------------- */
20399       /* ------------- STRB Rd, [Rn, #imm5] ------------- */
20400       /* LDRB/STRB Rd, [Rn + imm5] */
20401       UInt    rD   = INSN0(2,0);
20402       UInt    rN   = INSN0(5,3);
20403       UInt    imm5 = INSN0(10,6);
20404       UInt    isLD = INSN0(11,11);
20405
20406       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5));
20407       put_ITSTATE(old_itstate); // backout
20408       if (isLD) {
20409          IRTemp tD = newTemp(Ity_I32);
20410          loadGuardedLE( tD, ILGop_8Uto32, ea, llGetIReg(rD), condT );
20411          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20412       } else {
20413          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
20414       }
20415       put_ITSTATE(new_itstate); // restore
20416
20417       DIP("%sb r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5);
20418       goto decode_success;
20419    }
20420
20421    case BITS5(1,0,0,1,0):   /* STR */
20422    case BITS5(1,0,0,1,1): { /* LDR */
20423       /* ------------- LDR Rd, [SP, #imm8 * 4] ------------- */
20424       /* ------------- STR Rd, [SP, #imm8 * 4] ------------- */
20425       /* LDR/STR Rd, [SP + imm8 * 4] */
20426       UInt rD    = INSN0(10,8);
20427       UInt imm8  = INSN0(7,0);
20428       UInt isLD  = INSN0(11,11);
20429
20430       IRExpr* ea = binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4));
20431       put_ITSTATE(old_itstate); // backout
20432       if (isLD) {
20433          IRTemp tD = newTemp(Ity_I32);
20434          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
20435          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20436       } else {
20437          storeGuardedLE(ea, getIRegT(rD), condT);
20438       }
20439       put_ITSTATE(new_itstate); // restore
20440
20441       DIP("%s r%u, [sp, #%u]\n", isLD ? "ldr" : "str", rD, imm8 * 4);
20442       goto decode_success;
20443    }
20444
20445    case BITS5(1,1,0,0,1): {
20446       /* ------------- LDMIA Rn!, {reglist} ------------- */
20447       Int i, nRegs = 0;
20448       UInt rN   = INSN0(10,8);
20449       UInt list = INSN0(7,0);
20450       /* Empty lists aren't allowed. */
20451       if (list != 0) {
20452          mk_skip_over_T16_if_cond_is_false(condT);
20453          condT = IRTemp_INVALID;
20454          put_ITSTATE(old_itstate);
20455          // now uncond
20456
20457          IRTemp oldRn = newTemp(Ity_I32);
20458          IRTemp base  = newTemp(Ity_I32);
20459          assign(oldRn, getIRegT(rN));
20460          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
20461          for (i = 0; i < 8; i++) {
20462             if (0 == (list & (1 << i)))
20463                continue;
20464             nRegs++;
20465             putIRegT(
20466                i, loadLE(Ity_I32,
20467                          binop(Iop_Add32, mkexpr(base),
20468                                           mkU32(nRegs * 4 - 4))),
20469                IRTemp_INVALID
20470             );
20471          }
20472          /* Only do the writeback for rN if it isn't in the list of
20473             registers to be transferred. */
20474          if (0 == (list & (1 << rN))) {
20475             putIRegT(rN,
20476                      binop(Iop_Add32, mkexpr(oldRn),
20477                                       mkU32(nRegs * 4)),
20478                      IRTemp_INVALID
20479             );
20480          }
20481
20482          /* Reinstate the ITSTATE update. */
20483          put_ITSTATE(new_itstate);
20484
20485          DIP("ldmia r%u!, {0x%04x}\n", rN, list);
20486          goto decode_success;
20487       }
20488       break;
20489    }
20490
20491    case BITS5(1,1,0,0,0): {
20492       /* ------------- STMIA Rn!, {reglist} ------------- */
20493       Int i, nRegs = 0;
20494       UInt rN   = INSN0(10,8);
20495       UInt list = INSN0(7,0);
20496       /* Empty lists aren't allowed.  Also, if rN is in the list then
20497          it must be the lowest numbered register in the list. */
20498       Bool valid = list != 0;
20499       if (valid && 0 != (list & (1 << rN))) {
20500          for (i = 0; i < rN; i++) {
20501             if (0 != (list & (1 << i)))
20502                valid = False;
20503          }
20504       }
20505       if (valid) {
20506          mk_skip_over_T16_if_cond_is_false(condT);
20507          condT = IRTemp_INVALID;
20508          put_ITSTATE(old_itstate);
20509          // now uncond
20510
20511          IRTemp oldRn = newTemp(Ity_I32);
20512          IRTemp base = newTemp(Ity_I32);
20513          assign(oldRn, getIRegT(rN));
20514          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
20515          for (i = 0; i < 8; i++) {
20516             if (0 == (list & (1 << i)))
20517                continue;
20518             nRegs++;
20519             storeLE( binop(Iop_Add32, mkexpr(base), mkU32(nRegs * 4 - 4)),
20520                      getIRegT(i) );
20521          }
20522          /* Always do the writeback. */
20523          putIRegT(rN,
20524                   binop(Iop_Add32, mkexpr(oldRn),
20525                                    mkU32(nRegs * 4)),
20526                   IRTemp_INVALID);
20527
20528          /* Reinstate the ITSTATE update. */
20529          put_ITSTATE(new_itstate);
20530
20531          DIP("stmia r%u!, {0x%04x}\n", rN, list);
20532          goto decode_success;
20533       }
20534       break;
20535    }
20536
20537    case BITS5(0,0,0,0,0):   /* LSLS */
20538    case BITS5(0,0,0,0,1):   /* LSRS */
20539    case BITS5(0,0,0,1,0): { /* ASRS */
20540       /* ---------------- LSLS Rd, Rm, #imm5 ---------------- */
20541       /* ---------------- LSRS Rd, Rm, #imm5 ---------------- */
20542       /* ---------------- ASRS Rd, Rm, #imm5 ---------------- */
20543       UInt   rD   = INSN0(2,0);
20544       UInt   rM   = INSN0(5,3);
20545       UInt   imm5 = INSN0(10,6);
20546       IRTemp res  = newTemp(Ity_I32);
20547       IRTemp resC = newTemp(Ity_I32);
20548       IRTemp rMt  = newTemp(Ity_I32);
20549       IRTemp oldV = newTemp(Ity_I32);
20550       const HChar* wot  = "???";
20551       assign(rMt, getIRegT(rM));
20552       assign(oldV, mk_armg_calculate_flag_v());
20553       /* Looks like INSN0(12,11) are the standard 'how' encoding.
20554          Could compactify if the ROR case later appears. */
20555       switch (INSN0(15,11)) {
20556          case BITS5(0,0,0,0,0):
20557             compute_result_and_C_after_LSL_by_imm5(
20558                dis_buf, &res, &resC, rMt, imm5, rM
20559             );
20560             wot = "lsl";
20561             break;
20562          case BITS5(0,0,0,0,1):
20563             compute_result_and_C_after_LSR_by_imm5(
20564                dis_buf, &res, &resC, rMt, imm5, rM
20565             );
20566             wot = "lsr";
20567             break;
20568          case BITS5(0,0,0,1,0):
20569             compute_result_and_C_after_ASR_by_imm5(
20570                dis_buf, &res, &resC, rMt, imm5, rM
20571             );
20572             wot = "asr";
20573             break;
20574          default:
20575             /*NOTREACHED*/vassert(0);
20576       }
20577       // not safe to read guest state after this point
20578       putIRegT(rD, mkexpr(res), condT);
20579       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
20580                          cond_AND_notInIT_T );
20581       /* ignore buf and roll our own output */
20582       DIP("%ss r%u, r%u, #%u\n", wot, rD, rM, imm5);
20583       goto decode_success;
20584    }
20585
20586    case BITS5(1,1,1,0,0): {
20587       /* ---------------- B #simm11 ---------------- */
20588       UInt uimm11 = INSN0(10,0);  uimm11 <<= 21;
20589       Int  simm11 = (Int)uimm11;  simm11 >>= 20;
20590       UInt dst    = simm11 + guest_R15_curr_instr_notENC + 4;
20591       /* Only allowed outside or last-in IT block; SIGILL if not so. */
20592       gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20593       // and skip this insn if not selected; being cleverer is too
20594       // difficult
20595       mk_skip_over_T16_if_cond_is_false(condT);
20596       condT = IRTemp_INVALID;
20597       // now uncond
20598       llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
20599       dres.jk_StopHere = Ijk_Boring;
20600       dres.whatNext    = Dis_StopHere;
20601       DIP("b 0x%x\n", dst);
20602       goto decode_success;
20603    }
20604
20605    default:
20606       break; /* examine the next shortest prefix */
20607
20608    }
20609
20610
20611    /* ================ 16-bit 15:12 cases ================ */
20612
20613    switch (INSN0(15,12)) {
20614
20615    case BITS4(1,1,0,1): {
20616       /* ---------------- Bcond #simm8 ---------------- */
20617       UInt cond  = INSN0(11,8);
20618       UInt uimm8 = INSN0(7,0);  uimm8 <<= 24;
20619       Int  simm8 = (Int)uimm8;  simm8 >>= 23;
20620       UInt dst   = simm8 + guest_R15_curr_instr_notENC + 4;
20621       if (cond != ARMCondAL && cond != ARMCondNV) {
20622          /* Not allowed in an IT block; SIGILL if so. */
20623          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
20624
20625          IRTemp kondT = newTemp(Ity_I32);
20626          assign( kondT, mk_armg_calculate_condition(cond) );
20627          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
20628                             Ijk_Boring,
20629                             IRConst_U32(dst | 1/*CPSR.T*/),
20630                             OFFB_R15T ));
20631          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2)
20632                               | 1 /*CPSR.T*/ ));
20633          dres.jk_StopHere = Ijk_Boring;
20634          dres.whatNext    = Dis_StopHere;
20635          DIP("b%s 0x%x\n", nCC(cond), dst);
20636          goto decode_success;
20637       }
20638       break;
20639    }
20640
20641    default:
20642       break; /* hmm, nothing matched */
20643
20644    }
20645
20646    /* ================ 16-bit misc cases ================ */
20647
20648    switch (INSN0(15,0)) {
20649       case 0xBF00:
20650          /* ------ NOP ------ */
20651          DIP("nop\n");
20652          goto decode_success;
20653       case 0xBF10: // YIELD
20654       case 0xBF20: // WFE
20655          /* ------ WFE, YIELD ------ */
20656          /* Both appear to get used as a spin-loop hints.  Do the usual thing,
20657             which is to continue after yielding. */
20658          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
20659                             Ijk_Yield,
20660                             IRConst_U32((guest_R15_curr_instr_notENC + 2)
20661                                         | 1 /*CPSR.T*/),
20662                             OFFB_R15T ));
20663          Bool isWFE = INSN0(15,0) == 0xBF20;
20664          DIP(isWFE ? "wfe\n" : "yield\n");
20665          goto decode_success;
20666       case 0xBF40:
20667          /* ------ SEV ------ */
20668          /* Treat this as a no-op.  Any matching WFEs won't really
20669             cause the host CPU to snooze; they just cause V to try to
20670             run some other thread for a while.  So there's no point in
20671             really doing anything for SEV. */
20672          DIP("sev\n");
20673          goto decode_success;
20674       default:
20675          break; /* fall through */
20676    }
20677
20678    /* ----------------------------------------------------------- */
20679    /* --                                                       -- */
20680    /* -- Thumb 32-bit integer instructions                     -- */
20681    /* --                                                       -- */
20682    /* ----------------------------------------------------------- */
20683
20684 #  define INSN1(_bMax,_bMin)  SLICE_UInt(((UInt)insn1), (_bMax), (_bMin))
20685
20686    /* second 16 bits of the instruction, if any */
20687    vassert(insn1 == 0);
20688    insn1 = getUShortLittleEndianly( guest_instr+2 );
20689
20690    anOp   = Iop_INVALID; /* paranoia */
20691    anOpNm = NULL;        /* paranoia */
20692
20693    /* Change result defaults to suit 32-bit insns. */
20694    vassert(dres.whatNext   == Dis_Continue);
20695    vassert(dres.len        == 2);
20696    dres.len = 4;
20697
20698    /* ---------------- BL/BLX simm26 ---------------- */
20699    if (BITS5(1,1,1,1,0) == INSN0(15,11) && BITS2(1,1) == INSN1(15,14)) {
20700       UInt isBL = INSN1(12,12);
20701       UInt bS   = INSN0(10,10);
20702       UInt bJ1  = INSN1(13,13);
20703       UInt bJ2  = INSN1(11,11);
20704       UInt bI1  = 1 ^ (bJ1 ^ bS);
20705       UInt bI2  = 1 ^ (bJ2 ^ bS);
20706       UInt uimm25
20707          =   (bS          << (1 + 1 + 10 + 11 + 1))
20708            | (bI1         << (1 + 10 + 11 + 1))
20709            | (bI2         << (10 + 11 + 1))
20710            | (INSN0(9,0)  << (11 + 1))
20711            | (INSN1(10,0) << 1);
20712       uimm25 <<= 7;
20713       Int simm25 = (Int)uimm25;
20714       simm25 >>= 7;
20715
20716       vassert(0 == (guest_R15_curr_instr_notENC & 1));
20717       UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
20718
20719       /* One further validity case to check: in the case of BLX
20720          (not-BL), that insn1[0] must be zero. */
20721       Bool valid = True;
20722       if (isBL == 0 && INSN1(0,0) == 1) valid = False;
20723       if (valid) {
20724          /* Only allowed outside or last-in IT block; SIGILL if not so. */
20725          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20726          // and skip this insn if not selected; being cleverer is too
20727          // difficult
20728          mk_skip_over_T32_if_cond_is_false(condT);
20729          condT = IRTemp_INVALID;
20730          // now uncond
20731
20732          /* We're returning to Thumb code, hence "| 1" */
20733          putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 4) | 1 ),
20734                    IRTemp_INVALID);
20735          if (isBL) {
20736             /* BL: unconditional T -> T call */
20737             /* we're calling Thumb code, hence "| 1" */
20738             llPutIReg(15, mkU32( dst | 1 ));
20739             DIP("bl 0x%x (stay in Thumb mode)\n", dst);
20740          } else {
20741             /* BLX: unconditional T -> A call */
20742             /* we're calling ARM code, hence "& 3" to align to a
20743                valid ARM insn address */
20744             llPutIReg(15, mkU32( dst & ~3 ));
20745             DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
20746          }
20747          dres.whatNext    = Dis_StopHere;
20748          dres.jk_StopHere = Ijk_Call;
20749          goto decode_success;
20750       }
20751    }
20752
20753    /* ---------------- {LD,ST}M{IA,DB} ---------------- */
20754    if (0x3a2 == INSN0(15,6) // {LD,ST}MIA
20755        || 0x3a4 == INSN0(15,6)) { // {LD,ST}MDB
20756       UInt bW      = INSN0(5,5); /* writeback Rn ? */
20757       UInt bL      = INSN0(4,4);
20758       UInt rN      = INSN0(3,0);
20759       UInt bP      = INSN1(15,15); /* reglist entry for r15 */
20760       UInt bM      = INSN1(14,14); /* reglist entry for r14 */
20761       UInt rLmost  = INSN1(12,0);  /* reglist entry for r0 .. 12 */
20762       UInt rL13    = INSN1(13,13); /* must be zero */
20763       UInt regList = 0;
20764       Bool valid   = True;
20765
20766       UInt bINC    = 1;
20767       UInt bBEFORE = 0;
20768       if (INSN0(15,6) == 0x3a4) {
20769          bINC    = 0;
20770          bBEFORE = 1;
20771       }
20772
20773       /* detect statically invalid cases, and construct the final
20774          reglist */
20775       if (rL13 == 1)
20776          valid = False;
20777
20778       if (bL == 1) {
20779          regList = (bP << 15) | (bM << 14) | rLmost;
20780          if (rN == 15)                       valid = False;
20781          if (popcount32(regList) < 2)        valid = False;
20782          if (bP == 1 && bM == 1)             valid = False;
20783          if (bW == 1 && (regList & (1<<rN))) valid = False;
20784       } else {
20785          regList = (bM << 14) | rLmost;
20786          if (bP == 1)                        valid = False;
20787          if (rN == 15)                       valid = False;
20788          if (popcount32(regList) < 2)        valid = False;
20789          if (bW == 1 && (regList & (1<<rN))) valid = False;
20790       }
20791
20792       if (valid) {
20793          if (bL == 1 && bP == 1) {
20794             // We'll be writing the PC.  Hence:
20795             /* Only allowed outside or last-in IT block; SIGILL if not so. */
20796             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20797          }
20798
20799          /* Go uncond: */
20800          mk_skip_over_T32_if_cond_is_false(condT);
20801          condT = IRTemp_INVALID;
20802          // now uncond
20803
20804          /* Generate the IR.  This might generate a write to R15. */
20805          mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
20806
20807          if (bL == 1 && (regList & (1<<15))) {
20808             // If we wrote to R15, we have an interworking return to
20809             // deal with.
20810             llPutIReg(15, llGetIReg(15));
20811             dres.jk_StopHere = Ijk_Ret;
20812             dres.whatNext    = Dis_StopHere;
20813          }
20814
20815          DIP("%sm%c%c r%u%s, {0x%04x}\n",
20816               bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
20817               rN, bW ? "!" : "", regList);
20818
20819          goto decode_success;
20820       }
20821    }
20822
20823    /* -------------- (T3) ADD{S}.W Rd, Rn, #constT -------------- */
20824    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20825        && INSN0(9,5) == BITS5(0,1,0,0,0)
20826        && INSN1(15,15) == 0) {
20827       UInt bS = INSN0(4,4);
20828       UInt rN = INSN0(3,0);
20829       UInt rD = INSN1(11,8);
20830       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
20831       /* but allow "add.w reg, sp, #constT" for reg != PC */
20832       if (!valid && rD <= 14 && rN == 13)
20833          valid = True;
20834       if (valid) {
20835          IRTemp argL  = newTemp(Ity_I32);
20836          IRTemp argR  = newTemp(Ity_I32);
20837          IRTemp res   = newTemp(Ity_I32);
20838          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
20839          assign(argL, getIRegT(rN));
20840          assign(argR, mkU32(imm32));
20841          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
20842          putIRegT(rD, mkexpr(res), condT);
20843          if (bS == 1)
20844             setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
20845          DIP("add%s.w r%u, r%u, #%u\n",
20846              bS == 1 ? "s" : "", rD, rN, imm32);
20847          goto decode_success;
20848       }
20849    }
20850
20851    /* ---------------- (T4) ADDW Rd, Rn, #uimm12 -------------- */
20852    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20853        && INSN0(9,4) == BITS6(1,0,0,0,0,0)
20854        && INSN1(15,15) == 0) {
20855       UInt rN = INSN0(3,0);
20856       UInt rD = INSN1(11,8);
20857       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
20858       /* but allow "addw reg, sp, #uimm12" for reg != PC */
20859       if (!valid && rD <= 14 && rN == 13)
20860          valid = True;
20861       if (valid) {
20862          IRTemp argL = newTemp(Ity_I32);
20863          IRTemp argR = newTemp(Ity_I32);
20864          IRTemp res  = newTemp(Ity_I32);
20865          UInt imm12  = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
20866          assign(argL, getIRegT(rN));
20867          assign(argR, mkU32(imm12));
20868          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
20869          putIRegT(rD, mkexpr(res), condT);
20870          DIP("addw r%u, r%u, #%u\n", rD, rN, imm12);
20871          goto decode_success;
20872       }
20873    }
20874
20875    /* ---------------- (T2) CMP.W Rn, #constT ---------------- */
20876    /* ---------------- (T2) CMN.W Rn, #constT ---------------- */
20877    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20878        && (   INSN0(9,4) == BITS6(0,1,1,0,1,1)  // CMP
20879            || INSN0(9,4) == BITS6(0,1,0,0,0,1)) // CMN
20880        && INSN1(15,15) == 0
20881        && INSN1(11,8) == BITS4(1,1,1,1)) {
20882       UInt rN = INSN0(3,0);
20883       if (rN != 15) {
20884          IRTemp argL  = newTemp(Ity_I32);
20885          IRTemp argR  = newTemp(Ity_I32);
20886          Bool   isCMN = INSN0(9,4) == BITS6(0,1,0,0,0,1);
20887          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
20888          assign(argL, getIRegT(rN));
20889          assign(argR, mkU32(imm32));
20890          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
20891                          argL, argR, condT );
20892          DIP("%s.w r%u, #%u\n", isCMN ? "cmn" : "cmp", rN, imm32);
20893          goto decode_success;
20894       }
20895    }
20896
20897    /* -------------- (T1) TST.W Rn, #constT -------------- */
20898    /* -------------- (T1) TEQ.W Rn, #constT -------------- */
20899    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20900        && (   INSN0(9,4) == BITS6(0,0,0,0,0,1)  // TST
20901            || INSN0(9,4) == BITS6(0,0,1,0,0,1)) // TEQ
20902        && INSN1(15,15) == 0
20903        && INSN1(11,8) == BITS4(1,1,1,1)) {
20904       UInt rN = INSN0(3,0);
20905       if (!isBadRegT(rN)) { // yes, really, it's inconsistent with CMP.W
20906          Bool  isTST  = INSN0(9,4) == BITS6(0,0,0,0,0,1);
20907          IRTemp argL  = newTemp(Ity_I32);
20908          IRTemp argR  = newTemp(Ity_I32);
20909          IRTemp res   = newTemp(Ity_I32);
20910          IRTemp oldV  = newTemp(Ity_I32);
20911          IRTemp oldC  = newTemp(Ity_I32);
20912          Bool   updC  = False;
20913          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
20914          assign(argL, getIRegT(rN));
20915          assign(argR, mkU32(imm32));
20916          assign(res,  binop(isTST ? Iop_And32 : Iop_Xor32,
20917                             mkexpr(argL), mkexpr(argR)));
20918          assign( oldV, mk_armg_calculate_flag_v() );
20919          assign( oldC, updC
20920                        ? mkU32((imm32 >> 31) & 1)
20921                        : mk_armg_calculate_flag_c() );
20922          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
20923          DIP("%s.w r%u, #%u\n", isTST ? "tst" : "teq", rN, imm32);
20924          goto decode_success;
20925       }
20926    }
20927
20928    /* -------------- (T3) SUB{S}.W Rd, Rn, #constT -------------- */
20929    /* -------------- (T3) RSB{S}.W Rd, Rn, #constT -------------- */
20930    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20931        && (INSN0(9,5) == BITS5(0,1,1,0,1) // SUB
20932            || INSN0(9,5) == BITS5(0,1,1,1,0)) // RSB
20933        && INSN1(15,15) == 0) {
20934       Bool isRSB = INSN0(9,5) == BITS5(0,1,1,1,0);
20935       UInt bS    = INSN0(4,4);
20936       UInt rN    = INSN0(3,0);
20937       UInt rD    = INSN1(11,8);
20938       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
20939       /* but allow "sub{s}.w reg, sp, #constT
20940          this is (T2) of "SUB (SP minus immediate)" */
20941       if (!valid && !isRSB && rN == 13 && rD != 15)
20942          valid = True;
20943       if (valid) {
20944          IRTemp argL  = newTemp(Ity_I32);
20945          IRTemp argR  = newTemp(Ity_I32);
20946          IRTemp res   = newTemp(Ity_I32);
20947          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
20948          assign(argL, getIRegT(rN));
20949          assign(argR, mkU32(imm32));
20950          assign(res,  isRSB
20951                       ? binop(Iop_Sub32, mkexpr(argR), mkexpr(argL))
20952                       : binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
20953          putIRegT(rD, mkexpr(res), condT);
20954          if (bS == 1) {
20955             if (isRSB)
20956                setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
20957             else
20958                setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
20959          }
20960          DIP("%s%s.w r%u, r%u, #%u\n",
20961              isRSB ? "rsb" : "sub", bS == 1 ? "s" : "", rD, rN, imm32);
20962          goto decode_success;
20963       }
20964    }
20965
20966    /* -------------- (T4) SUBW Rd, Rn, #uimm12 ------------------- */
20967    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20968        && INSN0(9,4) == BITS6(1,0,1,0,1,0)
20969        && INSN1(15,15) == 0) {
20970       UInt rN = INSN0(3,0);
20971       UInt rD = INSN1(11,8);
20972       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
20973       /* but allow "subw sp, sp, #uimm12" */
20974       if (!valid && rD == 13 && rN == 13)
20975          valid = True;
20976       if (valid) {
20977          IRTemp argL  = newTemp(Ity_I32);
20978          IRTemp argR  = newTemp(Ity_I32);
20979          IRTemp res   = newTemp(Ity_I32);
20980          UInt imm12   = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
20981          assign(argL, getIRegT(rN));
20982          assign(argR, mkU32(imm12));
20983          assign(res,  binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
20984          putIRegT(rD, mkexpr(res), condT);
20985          DIP("subw r%u, r%u, #%u\n", rD, rN, imm12);
20986          goto decode_success;
20987       }
20988    }
20989
20990    /* -------------- (T1) ADC{S}.W Rd, Rn, #constT -------------- */
20991    /* -------------- (T1) SBC{S}.W Rd, Rn, #constT -------------- */
20992    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20993        && (   INSN0(9,5) == BITS5(0,1,0,1,0)  // ADC
20994            || INSN0(9,5) == BITS5(0,1,0,1,1)) // SBC
20995        && INSN1(15,15) == 0) {
20996       /* ADC:  Rd = Rn + constT + oldC */
20997       /* SBC:  Rd = Rn - constT - (oldC ^ 1) */
20998       UInt bS    = INSN0(4,4);
20999       UInt rN    = INSN0(3,0);
21000       UInt rD    = INSN1(11,8);
21001       if (!isBadRegT(rN) && !isBadRegT(rD)) {
21002          IRTemp argL  = newTemp(Ity_I32);
21003          IRTemp argR  = newTemp(Ity_I32);
21004          IRTemp res   = newTemp(Ity_I32);
21005          IRTemp oldC  = newTemp(Ity_I32);
21006          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
21007          assign(argL, getIRegT(rN));
21008          assign(argR, mkU32(imm32));
21009          assign(oldC, mk_armg_calculate_flag_c() );
21010          const HChar* nm  = "???";
21011          switch (INSN0(9,5)) {
21012             case BITS5(0,1,0,1,0): // ADC
21013                nm = "adc";
21014                assign(res,
21015                       binop(Iop_Add32,
21016                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
21017                             mkexpr(oldC) ));
21018                putIRegT(rD, mkexpr(res), condT);
21019                if (bS)
21020                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
21021                                      argL, argR, oldC, condT );
21022                break;
21023             case BITS5(0,1,0,1,1): // SBC
21024                nm = "sbc";
21025                assign(res,
21026                       binop(Iop_Sub32,
21027                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
21028                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
21029                putIRegT(rD, mkexpr(res), condT);
21030                if (bS)
21031                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
21032                                      argL, argR, oldC, condT );
21033                break;
21034             default:
21035               vassert(0);
21036          }
21037          DIP("%s%s.w r%u, r%u, #%u\n",
21038              nm, bS == 1 ? "s" : "", rD, rN, imm32);
21039          goto decode_success;
21040       }
21041    }
21042
21043    /* -------------- (T1) ORR{S}.W Rd, Rn, #constT -------------- */
21044    /* -------------- (T1) AND{S}.W Rd, Rn, #constT -------------- */
21045    /* -------------- (T1) BIC{S}.W Rd, Rn, #constT -------------- */
21046    /* -------------- (T1) EOR{S}.W Rd, Rn, #constT -------------- */
21047    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21048        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // ORR
21049            || INSN0(9,5) == BITS5(0,0,0,0,0)  // AND
21050            || INSN0(9,5) == BITS5(0,0,0,0,1)  // BIC
21051            || INSN0(9,5) == BITS5(0,0,1,0,0)  // EOR
21052            || INSN0(9,5) == BITS5(0,0,0,1,1)) // ORN
21053        && INSN1(15,15) == 0) {
21054       UInt bS = INSN0(4,4);
21055       UInt rN = INSN0(3,0);
21056       UInt rD = INSN1(11,8);
21057       if (!isBadRegT(rN) && !isBadRegT(rD)) {
21058          Bool   notArgR = False;
21059          IROp   op      = Iop_INVALID;
21060          const HChar* nm = "???";
21061          switch (INSN0(9,5)) {
21062             case BITS5(0,0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
21063             case BITS5(0,0,0,0,0): op = Iop_And32; nm = "and"; break;
21064             case BITS5(0,0,0,0,1): op = Iop_And32; nm = "bic";
21065                                    notArgR = True; break;
21066             case BITS5(0,0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
21067             case BITS5(0,0,0,1,1): op = Iop_Or32;  nm = "orn";
21068                                    notArgR = True; break;
21069             default: vassert(0);
21070          }
21071          IRTemp argL  = newTemp(Ity_I32);
21072          IRTemp argR  = newTemp(Ity_I32);
21073          IRTemp res   = newTemp(Ity_I32);
21074          Bool   updC  = False;
21075          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
21076          assign(argL, getIRegT(rN));
21077          assign(argR, mkU32(notArgR ? ~imm32 : imm32));
21078          assign(res,  binop(op, mkexpr(argL), mkexpr(argR)));
21079          putIRegT(rD, mkexpr(res), condT);
21080          if (bS) {
21081             IRTemp oldV = newTemp(Ity_I32);
21082             IRTemp oldC = newTemp(Ity_I32);
21083             assign( oldV, mk_armg_calculate_flag_v() );
21084             assign( oldC, updC
21085                           ? mkU32((imm32 >> 31) & 1)
21086                           : mk_armg_calculate_flag_c() );
21087             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21088                                condT );
21089          }
21090          DIP("%s%s.w r%u, r%u, #%u\n",
21091              nm, bS == 1 ? "s" : "", rD, rN, imm32);
21092          goto decode_success;
21093       }
21094    }
21095
21096    /* ---------- (T3) ADD{S}.W Rd, Rn, Rm, {shift} ---------- */
21097    /* ---------- (T3) SUB{S}.W Rd, Rn, Rm, {shift} ---------- */
21098    /* ---------- (T3) RSB{S}.W Rd, Rn, Rm, {shift} ---------- */
21099    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21100        && (   INSN0(8,5) == BITS4(1,0,0,0)  // add subopc
21101            || INSN0(8,5) == BITS4(1,1,0,1)  // sub subopc
21102            || INSN0(8,5) == BITS4(1,1,1,0)) // rsb subopc
21103        && INSN1(15,15) == 0) {
21104       UInt rN   = INSN0(3,0);
21105       UInt rD   = INSN1(11,8);
21106       UInt rM   = INSN1(3,0);
21107       UInt bS   = INSN0(4,4);
21108       UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21109       UInt how  = INSN1(5,4);
21110
21111       Bool valid = !isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM);
21112       /* but allow "add.w reg, sp, reg, lsl #N for N=0..31
21113          (T3) "ADD (SP plus register) */
21114       if (!valid && INSN0(8,5) == BITS4(1,0,0,0) // add
21115           && rD != 15 && rN == 13 && imm5 <= 31 && how == 0) {
21116          valid = True;
21117       }
21118       /* also allow "sub.w reg, sp, reg   lsl #N for N=0 .. 5
21119          (T1) "SUB (SP minus register) */
21120       if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // sub
21121           && rD != 15 && rN == 13 && imm5 <= 5 && how == 0) {
21122          valid = True;
21123       }
21124       if (valid) {
21125          Bool   swap = False;
21126          IROp   op   = Iop_INVALID;
21127          const HChar* nm = "???";
21128          switch (INSN0(8,5)) {
21129             case BITS4(1,0,0,0): op = Iop_Add32; nm = "add"; break;
21130             case BITS4(1,1,0,1): op = Iop_Sub32; nm = "sub"; break;
21131             case BITS4(1,1,1,0): op = Iop_Sub32; nm = "rsb";
21132                                  swap = True; break;
21133             default: vassert(0);
21134          }
21135
21136          IRTemp argL = newTemp(Ity_I32);
21137          assign(argL, getIRegT(rN));
21138
21139          IRTemp rMt = newTemp(Ity_I32);
21140          assign(rMt, getIRegT(rM));
21141
21142          IRTemp argR = newTemp(Ity_I32);
21143          compute_result_and_C_after_shift_by_imm5(
21144             dis_buf, &argR, NULL, rMt, how, imm5, rM
21145          );
21146
21147          IRTemp res = newTemp(Ity_I32);
21148          assign(res, swap
21149                      ? binop(op, mkexpr(argR), mkexpr(argL))
21150                      : binop(op, mkexpr(argL), mkexpr(argR)));
21151
21152          putIRegT(rD, mkexpr(res), condT);
21153          if (bS) {
21154             switch (op) {
21155                case Iop_Add32:
21156                   setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
21157                   break;
21158                case Iop_Sub32:
21159                   if (swap)
21160                      setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
21161                   else
21162                      setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
21163                   break;
21164                default:
21165                   vassert(0);
21166             }
21167          }
21168
21169          DIP("%s%s.w r%u, r%u, %s\n",
21170              nm, bS ? "s" : "", rD, rN, dis_buf);
21171          goto decode_success;
21172       }
21173    }
21174
21175    /* ---------- (T3) ADC{S}.W Rd, Rn, Rm, {shift} ---------- */
21176    /* ---------- (T2) SBC{S}.W Rd, Rn, Rm, {shift} ---------- */
21177    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21178        && (   INSN0(8,5) == BITS4(1,0,1,0)   // adc subopc
21179            || INSN0(8,5) == BITS4(1,0,1,1))  // sbc subopc
21180        && INSN1(15,15) == 0) {
21181       /* ADC:  Rd = Rn + shifter_operand + oldC */
21182       /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
21183       UInt rN = INSN0(3,0);
21184       UInt rD = INSN1(11,8);
21185       UInt rM = INSN1(3,0);
21186       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21187          UInt bS   = INSN0(4,4);
21188          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21189          UInt how  = INSN1(5,4);
21190
21191          IRTemp argL = newTemp(Ity_I32);
21192          assign(argL, getIRegT(rN));
21193
21194          IRTemp rMt = newTemp(Ity_I32);
21195          assign(rMt, getIRegT(rM));
21196
21197          IRTemp oldC = newTemp(Ity_I32);
21198          assign(oldC, mk_armg_calculate_flag_c());
21199
21200          IRTemp argR = newTemp(Ity_I32);
21201          compute_result_and_C_after_shift_by_imm5(
21202             dis_buf, &argR, NULL, rMt, how, imm5, rM
21203          );
21204
21205          const HChar* nm  = "???";
21206          IRTemp res = newTemp(Ity_I32);
21207          switch (INSN0(8,5)) {
21208             case BITS4(1,0,1,0): // ADC
21209                nm = "adc";
21210                assign(res,
21211                       binop(Iop_Add32,
21212                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
21213                             mkexpr(oldC) ));
21214                putIRegT(rD, mkexpr(res), condT);
21215                if (bS)
21216                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
21217                                      argL, argR, oldC, condT );
21218                break;
21219             case BITS4(1,0,1,1): // SBC
21220                nm = "sbc";
21221                assign(res,
21222                       binop(Iop_Sub32,
21223                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
21224                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
21225                putIRegT(rD, mkexpr(res), condT);
21226                if (bS)
21227                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
21228                                      argL, argR, oldC, condT );
21229                break;
21230             default:
21231                vassert(0);
21232          }
21233
21234          DIP("%s%s.w r%u, r%u, %s\n",
21235              nm, bS ? "s" : "", rD, rN, dis_buf);
21236          goto decode_success;
21237       }
21238    }
21239
21240    /* ---------- (T3) AND{S}.W Rd, Rn, Rm, {shift} ---------- */
21241    /* ---------- (T3) ORR{S}.W Rd, Rn, Rm, {shift} ---------- */
21242    /* ---------- (T3) EOR{S}.W Rd, Rn, Rm, {shift} ---------- */
21243    /* ---------- (T3) BIC{S}.W Rd, Rn, Rm, {shift} ---------- */
21244    /* ---------- (T1) ORN{S}.W Rd, Rn, Rm, {shift} ---------- */
21245    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21246        && (   INSN0(8,5) == BITS4(0,0,0,0)  // and subopc
21247            || INSN0(8,5) == BITS4(0,0,1,0)  // orr subopc
21248            || INSN0(8,5) == BITS4(0,1,0,0)  // eor subopc
21249            || INSN0(8,5) == BITS4(0,0,0,1)  // bic subopc
21250            || INSN0(8,5) == BITS4(0,0,1,1)) // orn subopc
21251        && INSN1(15,15) == 0) {
21252       UInt rN = INSN0(3,0);
21253       UInt rD = INSN1(11,8);
21254       UInt rM = INSN1(3,0);
21255       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21256          Bool notArgR = False;
21257          IROp op      = Iop_INVALID;
21258          const HChar* nm  = "???";
21259          switch (INSN0(8,5)) {
21260             case BITS4(0,0,0,0): op = Iop_And32; nm = "and"; break;
21261             case BITS4(0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
21262             case BITS4(0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
21263             case BITS4(0,0,0,1): op = Iop_And32; nm = "bic";
21264                                  notArgR = True; break;
21265             case BITS4(0,0,1,1): op = Iop_Or32; nm = "orn";
21266                                  notArgR = True; break;
21267             default: vassert(0);
21268          }
21269          UInt bS   = INSN0(4,4);
21270          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21271          UInt how  = INSN1(5,4);
21272
21273          IRTemp rNt = newTemp(Ity_I32);
21274          assign(rNt, getIRegT(rN));
21275
21276          IRTemp rMt = newTemp(Ity_I32);
21277          assign(rMt, getIRegT(rM));
21278
21279          IRTemp argR = newTemp(Ity_I32);
21280          IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21281
21282          compute_result_and_C_after_shift_by_imm5(
21283             dis_buf, &argR, bS ? &oldC : NULL, rMt, how, imm5, rM
21284          );
21285
21286          IRTemp res = newTemp(Ity_I32);
21287          if (notArgR) {
21288             vassert(op == Iop_And32 || op == Iop_Or32);
21289             assign(res, binop(op, mkexpr(rNt),
21290                                   unop(Iop_Not32, mkexpr(argR))));
21291          } else {
21292             assign(res, binop(op, mkexpr(rNt), mkexpr(argR)));
21293          }
21294
21295          putIRegT(rD, mkexpr(res), condT);
21296          if (bS) {
21297             IRTemp oldV = newTemp(Ity_I32);
21298             assign( oldV, mk_armg_calculate_flag_v() );
21299             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21300                                condT );
21301          }
21302
21303          DIP("%s%s.w r%u, r%u, %s\n",
21304              nm, bS ? "s" : "", rD, rN, dis_buf);
21305          goto decode_success;
21306       }
21307    }
21308
21309    /* -------------- (T?) LSL{S}.W Rd, Rn, Rm -------------- */
21310    /* -------------- (T?) LSR{S}.W Rd, Rn, Rm -------------- */
21311    /* -------------- (T?) ASR{S}.W Rd, Rn, Rm -------------- */
21312    /* -------------- (T?) ROR{S}.W Rd, Rn, Rm -------------- */
21313    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,0,0)
21314        && INSN1(15,12) == BITS4(1,1,1,1)
21315        && INSN1(7,4) == BITS4(0,0,0,0)) {
21316       UInt how = INSN0(6,5); // standard encoding
21317       UInt rN  = INSN0(3,0);
21318       UInt rD  = INSN1(11,8);
21319       UInt rM  = INSN1(3,0);
21320       UInt bS  = INSN0(4,4);
21321       Bool valid = !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rD);
21322       if (valid) {
21323          IRTemp rNt    = newTemp(Ity_I32);
21324          IRTemp rMt    = newTemp(Ity_I32);
21325          IRTemp res    = newTemp(Ity_I32);
21326          IRTemp oldC   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21327          IRTemp oldV   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21328          const HChar* nms[4] = { "lsl", "lsr", "asr", "ror" };
21329          const HChar* nm     = nms[how];
21330          assign(rNt, getIRegT(rN));
21331          assign(rMt, getIRegT(rM));
21332          compute_result_and_C_after_shift_by_reg(
21333             dis_buf, &res, bS ? &oldC : NULL,
21334             rNt, how, rMt, rN, rM
21335          );
21336          if (bS)
21337             assign(oldV, mk_armg_calculate_flag_v());
21338          putIRegT(rD, mkexpr(res), condT);
21339          if (bS) {
21340             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21341                                condT );
21342          }
21343          DIP("%s%s.w r%u, r%u, r%u\n",
21344              nm, bS ? "s" : "", rD, rN, rM);
21345          goto decode_success;
21346       }
21347    }
21348
21349    /* ------------ (T?) MOV{S}.W Rd, Rn, {shift} ------------ */
21350    /* ------------ (T?) MVN{S}.W Rd, Rn, {shift} ------------ */
21351    if ((INSN0(15,0) & 0xFFCF) == 0xEA4F
21352        && INSN1(15,15) == 0) {
21353       UInt rD      = INSN1(11,8);
21354       UInt rN      = INSN1(3,0);
21355       UInt bS      = INSN0(4,4);
21356       UInt isMVN   = INSN0(5,5);
21357       Bool regsOK  = (bS || isMVN)
21358                         ? (!isBadRegT(rD) && !isBadRegT(rN))
21359                         : (rD != 15 && rN != 15 && (rD != 13 || rN != 13));
21360       if (regsOK) {
21361          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
21362          UInt how   = INSN1(5,4);
21363
21364          IRTemp rNt = newTemp(Ity_I32);
21365          assign(rNt, getIRegT(rN));
21366
21367          IRTemp oldRn = newTemp(Ity_I32);
21368          IRTemp oldC  = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21369          compute_result_and_C_after_shift_by_imm5(
21370             dis_buf, &oldRn, bS ? &oldC : NULL, rNt, how, imm5, rN
21371          );
21372
21373          IRTemp res = newTemp(Ity_I32);
21374          assign(res, isMVN ? unop(Iop_Not32, mkexpr(oldRn))
21375                            : mkexpr(oldRn));
21376
21377          putIRegT(rD, mkexpr(res), condT);
21378          if (bS) {
21379             IRTemp oldV = newTemp(Ity_I32);
21380             assign( oldV, mk_armg_calculate_flag_v() );
21381             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT);
21382          }
21383          DIP("%s%s.w r%u, %s\n",
21384              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, dis_buf);
21385          goto decode_success;
21386       }
21387    }
21388
21389    /* -------------- (T?) TST.W Rn, Rm, {shift} -------------- */
21390    /* -------------- (T?) TEQ.W Rn, Rm, {shift} -------------- */
21391    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21392        && (   INSN0(8,4) == BITS5(0,0,0,0,1)  // TST
21393            || INSN0(8,4) == BITS5(0,1,0,0,1)) // TEQ
21394        && INSN1(15,15) == 0
21395        && INSN1(11,8) == BITS4(1,1,1,1)) {
21396       UInt rN = INSN0(3,0);
21397       UInt rM = INSN1(3,0);
21398       if (!isBadRegT(rN) && !isBadRegT(rM)) {
21399          Bool isTST = INSN0(8,4) == BITS5(0,0,0,0,1);
21400
21401          UInt how  = INSN1(5,4);
21402          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21403
21404          IRTemp argL = newTemp(Ity_I32);
21405          assign(argL, getIRegT(rN));
21406
21407          IRTemp rMt = newTemp(Ity_I32);
21408          assign(rMt, getIRegT(rM));
21409
21410          IRTemp argR = newTemp(Ity_I32);
21411          IRTemp oldC = newTemp(Ity_I32);
21412          compute_result_and_C_after_shift_by_imm5(
21413             dis_buf, &argR, &oldC, rMt, how, imm5, rM
21414          );
21415
21416          IRTemp oldV = newTemp(Ity_I32);
21417          assign( oldV, mk_armg_calculate_flag_v() );
21418
21419          IRTemp res = newTemp(Ity_I32);
21420          assign(res, binop(isTST ? Iop_And32 : Iop_Xor32,
21421                            mkexpr(argL), mkexpr(argR)));
21422
21423          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21424                             condT );
21425          DIP("%s.w r%u, %s\n", isTST ? "tst" : "teq", rN, dis_buf);
21426          goto decode_success;
21427       }
21428    }
21429
21430    /* -------------- (T3) CMP.W Rn, Rm, {shift} -------------- */
21431    /* -------------- (T2) CMN.W Rn, Rm, {shift} -------------- */
21432    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21433        && (   INSN0(8,4) == BITS5(1,1,0,1,1)  // CMP
21434            || INSN0(8,4) == BITS5(1,0,0,0,1)) // CMN
21435        && INSN1(15,15) == 0
21436        && INSN1(11,8) == BITS4(1,1,1,1)) {
21437       UInt rN = INSN0(3,0);
21438       UInt rM = INSN1(3,0);
21439       if (!isBadRegT(rN) && !isBadRegT(rM)) {
21440          Bool isCMN = INSN0(8,4) == BITS5(1,0,0,0,1);
21441          UInt how   = INSN1(5,4);
21442          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
21443
21444          IRTemp argL = newTemp(Ity_I32);
21445          assign(argL, getIRegT(rN));
21446
21447          IRTemp rMt = newTemp(Ity_I32);
21448          assign(rMt, getIRegT(rM));
21449
21450          IRTemp argR = newTemp(Ity_I32);
21451          compute_result_and_C_after_shift_by_imm5(
21452             dis_buf, &argR, NULL, rMt, how, imm5, rM
21453          );
21454
21455          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
21456                          argL, argR, condT );
21457
21458          DIP("%s.w r%u, %s\n", isCMN ? "cmn" : "cmp", rN, dis_buf);
21459          goto decode_success;
21460       }
21461    }
21462
21463    /* -------------- (T2) MOV{S}.W Rd, #constT -------------- */
21464    /* -------------- (T2) MVN{S}.W Rd, #constT -------------- */
21465    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21466        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // MOV
21467            || INSN0(9,5) == BITS5(0,0,0,1,1)) // MVN
21468        && INSN0(3,0) == BITS4(1,1,1,1)
21469        && INSN1(15,15) == 0) {
21470       UInt rD = INSN1(11,8);
21471       if (!isBadRegT(rD)) {
21472          Bool   updC  = False;
21473          UInt   bS    = INSN0(4,4);
21474          Bool   isMVN = INSN0(5,5) == 1;
21475          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
21476          IRTemp res   = newTemp(Ity_I32);
21477          assign(res, mkU32(isMVN ? ~imm32 : imm32));
21478          putIRegT(rD, mkexpr(res), condT);
21479          if (bS) {
21480             IRTemp oldV = newTemp(Ity_I32);
21481             IRTemp oldC = newTemp(Ity_I32);
21482             assign( oldV, mk_armg_calculate_flag_v() );
21483             assign( oldC, updC
21484                           ? mkU32((imm32 >> 31) & 1)
21485                           : mk_armg_calculate_flag_c() );
21486             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21487                                condT );
21488          }
21489          DIP("%s%s.w r%u, #%u\n",
21490              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, imm32);
21491          goto decode_success;
21492       }
21493    }
21494
21495    /* -------------- (T3) MOVW Rd, #imm16 -------------- */
21496    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21497        && INSN0(9,4) == BITS6(1,0,0,1,0,0)
21498        && INSN1(15,15) == 0) {
21499       UInt rD = INSN1(11,8);
21500       if (!isBadRegT(rD)) {
21501          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
21502                       | (INSN1(14,12) << 8) | INSN1(7,0);
21503          putIRegT(rD, mkU32(imm16), condT);
21504          DIP("movw r%u, #%u\n", rD, imm16);
21505          goto decode_success;
21506       }
21507    }
21508
21509    /* ---------------- MOVT Rd, #imm16 ---------------- */
21510    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21511        && INSN0(9,4) == BITS6(1,0,1,1,0,0)
21512        && INSN1(15,15) == 0) {
21513       UInt rD = INSN1(11,8);
21514       if (!isBadRegT(rD)) {
21515          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
21516                       | (INSN1(14,12) << 8) | INSN1(7,0);
21517          IRTemp res = newTemp(Ity_I32);
21518          assign(res,
21519                 binop(Iop_Or32,
21520                       binop(Iop_And32, getIRegT(rD), mkU32(0xFFFF)),
21521                       mkU32(imm16 << 16)));
21522          putIRegT(rD, mkexpr(res), condT);
21523          DIP("movt r%u, #%u\n", rD, imm16);
21524          goto decode_success;
21525       }
21526    }
21527
21528    /* ---------------- LD/ST reg+/-#imm8 ---------------- */
21529    /* Loads and stores of the form:
21530          op  Rt, [Rn, #-imm8]      or
21531          op  Rt, [Rn], #+/-imm8    or
21532          op  Rt, [Rn, #+/-imm8]!
21533       where op is one of
21534          ldrb ldrh ldr  ldrsb ldrsh
21535          strb strh str
21536    */
21537    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0) && INSN1(11,11) == 1) {
21538       Bool   valid  = True;
21539       Bool   syned  = False;
21540       Bool   isST   = False;
21541       IRType ty     = Ity_I8;
21542       const HChar* nm = "???";
21543
21544       switch (INSN0(8,4)) {
21545          case BITS5(0,0,0,0,0):   // strb
21546             nm = "strb"; isST = True; break;
21547          case BITS5(0,0,0,0,1):   // ldrb
21548             nm = "ldrb"; break;
21549          case BITS5(1,0,0,0,1):   // ldrsb
21550             nm = "ldrsb"; syned = True; break;
21551          case BITS5(0,0,0,1,0):   // strh
21552             nm = "strh"; ty = Ity_I16; isST = True; break;
21553          case BITS5(0,0,0,1,1):   // ldrh
21554             nm = "ldrh"; ty = Ity_I16; break;
21555          case BITS5(1,0,0,1,1):   // ldrsh
21556             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
21557          case BITS5(0,0,1,0,0):   // str
21558             nm = "str"; ty = Ity_I32; isST = True; break;
21559          case BITS5(0,0,1,0,1):
21560             nm = "ldr"; ty = Ity_I32; break;  // ldr
21561          default:
21562             valid = False; break;
21563       }
21564
21565       UInt rN      = INSN0(3,0);
21566       UInt rT      = INSN1(15,12);
21567       UInt bP      = INSN1(10,10);
21568       UInt bU      = INSN1(9,9);
21569       UInt bW      = INSN1(8,8);
21570       UInt imm8    = INSN1(7,0);
21571       Bool loadsPC = False;
21572
21573       if (valid) {
21574          if (bP == 1 && bU == 1 && bW == 0)
21575             valid = False;
21576          if (bP == 0 && bW == 0)
21577             valid = False;
21578          if (rN == 15)
21579             valid = False;
21580          if (bW == 1 && rN == rT)
21581             valid = False;
21582          if (ty == Ity_I8 || ty == Ity_I16) {
21583             if (isBadRegT(rT))
21584                valid = False;
21585          } else {
21586             /* ty == Ity_I32 */
21587             if (isST && rT == 15)
21588                valid = False;
21589             if (!isST && rT == 15)
21590                loadsPC = True;
21591          }
21592       }
21593
21594       if (valid) {
21595          // if it's a branch, it can't happen in the middle of an IT block
21596          // Also, if it is a branch, make it unconditional at this point.
21597          // Doing conditional branches in-line is too complex (for now)
21598          if (loadsPC) {
21599             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
21600             // go uncond
21601             mk_skip_over_T32_if_cond_is_false(condT);
21602             condT = IRTemp_INVALID;
21603             // now uncond
21604          }
21605
21606          IRTemp preAddr = newTemp(Ity_I32);
21607          assign(preAddr, getIRegT(rN));
21608
21609          IRTemp postAddr = newTemp(Ity_I32);
21610          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
21611                                 mkexpr(preAddr), mkU32(imm8)));
21612
21613          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
21614
21615          if (isST) {
21616
21617             /* Store.  If necessary, update the base register before
21618                the store itself, so that the common idiom of "str rX,
21619                [sp, #-4]!" (store rX at sp-4, then do new sp = sp-4,
21620                a.k.a "push rX") doesn't cause Memcheck to complain
21621                that the access is below the stack pointer.  Also, not
21622                updating sp before the store confuses Valgrind's
21623                dynamic stack-extending logic.  So do it before the
21624                store.  Hence we need to snarf the store data before
21625                doing the basereg update. */
21626
21627             /* get hold of the data to be stored */
21628             IRTemp oldRt = newTemp(Ity_I32);
21629             assign(oldRt, getIRegT(rT));
21630
21631             /* Update Rn if necessary. */
21632             if (bW == 1) {
21633                vassert(rN != rT); // assured by validity check above
21634                putIRegT(rN, mkexpr(postAddr), condT);
21635             }
21636
21637             /* generate the transfer */
21638             IRExpr* data = NULL;
21639             switch (ty) {
21640                case Ity_I8:
21641                   data = unop(Iop_32to8, mkexpr(oldRt));
21642                   break;
21643                case Ity_I16:
21644                   data = unop(Iop_32to16, mkexpr(oldRt));
21645                   break;
21646                case Ity_I32:
21647                   data = mkexpr(oldRt);
21648                   break;
21649                default:
21650                   vassert(0);
21651             }
21652             storeGuardedLE(mkexpr(transAddr), data, condT);
21653
21654          } else {
21655
21656             /* Load. */
21657             IRTemp llOldRt = newTemp(Ity_I32);
21658             assign(llOldRt, llGetIReg(rT));
21659
21660             /* generate the transfer */
21661             IRTemp    newRt = newTemp(Ity_I32);
21662             IRLoadGOp widen = ILGop_INVALID;
21663             switch (ty) {
21664                case Ity_I8:
21665                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
21666                case Ity_I16:
21667                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
21668                case Ity_I32:
21669                   widen = ILGop_Ident32; break;
21670                default:
21671                   vassert(0);
21672             }
21673             loadGuardedLE(newRt, widen,
21674                           mkexpr(transAddr), mkexpr(llOldRt), condT);
21675             if (rT == 15) {
21676                vassert(loadsPC);
21677                /* We'll do the write to the PC just below */
21678             } else {
21679                vassert(!loadsPC);
21680                /* IRTemp_INVALID is OK here because in the case where
21681                   condT is false at run time, we're just putting the
21682                   old rT value back. */
21683                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21684             }
21685
21686             /* Update Rn if necessary. */
21687             if (bW == 1) {
21688                vassert(rN != rT); // assured by validity check above
21689                putIRegT(rN, mkexpr(postAddr), condT);
21690             }
21691
21692             if (loadsPC) {
21693                /* Presumably this is an interworking branch. */
21694                vassert(rN != 15); // assured by validity check above
21695                vassert(rT == 15);
21696                vassert(condT == IRTemp_INVALID); /* due to check above */
21697                llPutIReg(15, mkexpr(newRt));
21698                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
21699                dres.whatNext    = Dis_StopHere;
21700             }
21701          }
21702
21703          if (bP == 1 && bW == 0) {
21704             DIP("%s.w r%u, [r%u, #%c%u]\n",
21705                 nm, rT, rN, bU ? '+' : '-', imm8);
21706          }
21707          else if (bP == 1 && bW == 1) {
21708             DIP("%s.w r%u, [r%u, #%c%u]!\n",
21709                 nm, rT, rN, bU ? '+' : '-', imm8);
21710          }
21711          else {
21712             vassert(bP == 0 && bW == 1);
21713             DIP("%s.w r%u, [r%u], #%c%u\n",
21714                 nm, rT, rN, bU ? '+' : '-', imm8);
21715          }
21716
21717          goto decode_success;
21718       }
21719    }
21720
21721    /* ------------- LD/ST reg+(reg<<imm2) ------------- */
21722    /* Loads and stores of the form:
21723          op  Rt, [Rn, Rm, LSL #imm8]
21724       where op is one of
21725          ldrb ldrh ldr  ldrsb ldrsh
21726          strb strh str
21727    */
21728    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)
21729        && INSN1(11,6) == BITS6(0,0,0,0,0,0)) {
21730       Bool   valid  = True;
21731       Bool   syned  = False;
21732       Bool   isST   = False;
21733       IRType ty     = Ity_I8;
21734       const HChar* nm = "???";
21735
21736       switch (INSN0(8,4)) {
21737          case BITS5(0,0,0,0,0):   // strb
21738             nm = "strb"; isST = True; break;
21739          case BITS5(0,0,0,0,1):   // ldrb
21740             nm = "ldrb"; break;
21741          case BITS5(1,0,0,0,1):   // ldrsb
21742             nm = "ldrsb"; syned = True; break;
21743          case BITS5(0,0,0,1,0):   // strh
21744             nm = "strh"; ty = Ity_I16; isST = True; break;
21745          case BITS5(0,0,0,1,1):   // ldrh
21746             nm = "ldrh"; ty = Ity_I16; break;
21747          case BITS5(1,0,0,1,1):   // ldrsh
21748             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
21749          case BITS5(0,0,1,0,0):   // str
21750             nm = "str"; ty = Ity_I32; isST = True; break;
21751          case BITS5(0,0,1,0,1):
21752             nm = "ldr"; ty = Ity_I32; break;  // ldr
21753          default:
21754             valid = False; break;
21755       }
21756
21757       UInt rN      = INSN0(3,0);
21758       UInt rM      = INSN1(3,0);
21759       UInt rT      = INSN1(15,12);
21760       UInt imm2    = INSN1(5,4);
21761       Bool loadsPC = False;
21762
21763       if (ty == Ity_I8 || ty == Ity_I16) {
21764          /* all 8- and 16-bit load and store cases have the
21765             same exclusion set. */
21766          if (rN == 15 || isBadRegT(rT) || isBadRegT(rM))
21767             valid = False;
21768       } else {
21769          vassert(ty == Ity_I32);
21770          if (rN == 15 || isBadRegT(rM))
21771             valid = False;
21772          if (isST && rT == 15)
21773             valid = False;
21774          /* If it is a load and rT is 15, that's only allowable if we
21775             not in an IT block, or are the last in it.  Need to insert
21776             a dynamic check for that. */
21777          if (!isST && rT == 15)
21778             loadsPC = True;
21779       }
21780
21781       if (valid) {
21782          // if it's a branch, it can't happen in the middle of an IT block
21783          // Also, if it is a branch, make it unconditional at this point.
21784          // Doing conditional branches in-line is too complex (for now)
21785          if (loadsPC) {
21786             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
21787             // go uncond
21788             mk_skip_over_T32_if_cond_is_false(condT);
21789             condT = IRTemp_INVALID;
21790             // now uncond
21791          }
21792
21793          IRTemp transAddr = newTemp(Ity_I32);
21794          assign(transAddr,
21795                 binop( Iop_Add32,
21796                        getIRegT(rN),
21797                        binop(Iop_Shl32, getIRegT(rM), mkU8(imm2)) ));
21798
21799          if (isST) {
21800
21801             /* get hold of the data to be stored */
21802             IRTemp oldRt = newTemp(Ity_I32);
21803             assign(oldRt, getIRegT(rT));
21804
21805             /* generate the transfer */
21806             IRExpr* data = NULL;
21807             switch (ty) {
21808                case Ity_I8:
21809                   data = unop(Iop_32to8, mkexpr(oldRt));
21810                   break;
21811                case Ity_I16:
21812                   data = unop(Iop_32to16, mkexpr(oldRt));
21813                   break;
21814               case Ity_I32:
21815                   data = mkexpr(oldRt);
21816                   break;
21817               default:
21818                  vassert(0);
21819             }
21820             storeGuardedLE(mkexpr(transAddr), data, condT);
21821
21822          } else {
21823
21824             /* Load. */
21825             IRTemp llOldRt = newTemp(Ity_I32);
21826             assign(llOldRt, llGetIReg(rT));
21827
21828             /* generate the transfer */
21829             IRTemp    newRt = newTemp(Ity_I32);
21830             IRLoadGOp widen = ILGop_INVALID;
21831             switch (ty) {
21832                case Ity_I8:
21833                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
21834                case Ity_I16:
21835                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
21836                case Ity_I32:
21837                   widen = ILGop_Ident32; break;
21838                default:
21839                   vassert(0);
21840             }
21841             loadGuardedLE(newRt, widen,
21842                           mkexpr(transAddr), mkexpr(llOldRt), condT);
21843
21844             if (rT == 15) {
21845                vassert(loadsPC);
21846                /* We'll do the write to the PC just below */
21847             } else {
21848                vassert(!loadsPC);
21849                /* IRTemp_INVALID is OK here because in the case where
21850                   condT is false at run time, we're just putting the
21851                   old rT value back. */
21852                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21853             }
21854
21855             if (loadsPC) {
21856                /* Presumably this is an interworking branch. */
21857                vassert(rN != 15); // assured by validity check above
21858                vassert(rT == 15);
21859                vassert(condT == IRTemp_INVALID); /* due to check above */
21860                llPutIReg(15, mkexpr(newRt));
21861                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
21862                dres.whatNext    = Dis_StopHere;
21863             }
21864          }
21865
21866          DIP("%s.w r%u, [r%u, r%u, LSL #%u]\n",
21867              nm, rT, rN, rM, imm2);
21868
21869          goto decode_success;
21870       }
21871    }
21872
21873    /* --------------- LD/ST reg+imm12 --------------- */
21874    /* Loads and stores of the form:
21875          op  Rt, [Rn, #+-imm12]
21876       where op is one of
21877          ldrb ldrh ldr  ldrsb ldrsh
21878          strb strh str
21879    */
21880    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)) {
21881       Bool   valid  = True;
21882       Bool   syned  = INSN0(8,8) == 1;
21883       Bool   isST   = False;
21884       IRType ty     = Ity_I8;
21885       UInt   bU     = INSN0(7,7); // 1: +imm   0: -imm
21886                                   // -imm is only supported by literal versions
21887       const HChar* nm = "???";
21888
21889       switch (INSN0(6,4)) {
21890          case BITS3(0,0,0):   // strb
21891             nm = "strb"; isST = True; break;
21892          case BITS3(0,0,1):   // ldrb
21893             nm = syned ? "ldrsb" : "ldrb"; break;
21894          case BITS3(0,1,0):   // strh
21895             nm = "strh"; ty = Ity_I16; isST = True; break;
21896          case BITS3(0,1,1):   // ldrh
21897             nm = syned ? "ldrsh" : "ldrh"; ty = Ity_I16; break;
21898          case BITS3(1,0,0):   // str
21899             nm = "str"; ty = Ity_I32; isST = True; break;
21900          case BITS3(1,0,1):
21901             nm = "ldr"; ty = Ity_I32; break;  // ldr
21902          default:
21903             valid = False; break;
21904       }
21905
21906       UInt rN      = INSN0(3,0);
21907       UInt rT      = INSN1(15,12);
21908       UInt imm12   = INSN1(11,0);
21909       Bool loadsPC = False;
21910
21911       if (rN != 15 && bU == 0) {
21912          // only pc supports #-imm12
21913          valid = False;
21914       }
21915
21916       if (isST) {
21917          if (syned) valid = False;
21918          if (rN == 15 || rT == 15)
21919             valid = False;
21920       } else {
21921          /* For a 32-bit load, rT == 15 is only allowable if we are not
21922             in an IT block, or are the last in it.  Need to insert
21923             a dynamic check for that.  Also, in this particular
21924             case, rN == 15 is allowable.  In this case however, the
21925             value obtained for rN is (apparently)
21926             "word-align(address of current insn + 4)". */
21927          if (rT == 15) {
21928             if (ty == Ity_I32)
21929                loadsPC = True;
21930             else // Can't do it for B/H loads
21931                valid = False;
21932          }
21933       }
21934
21935       if (valid) {
21936          // if it's a branch, it can't happen in the middle of an IT block
21937          // Also, if it is a branch, make it unconditional at this point.
21938          // Doing conditional branches in-line is too complex (for now)
21939          if (loadsPC) {
21940             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
21941             // go uncond
21942             mk_skip_over_T32_if_cond_is_false(condT);
21943             condT = IRTemp_INVALID;
21944             // now uncond
21945          }
21946
21947          IRTemp rNt = newTemp(Ity_I32);
21948          if (rN == 15) {
21949             vassert(!isST);
21950             assign(rNt, binop(Iop_And32, getIRegT(15), mkU32(~3)));
21951          } else {
21952             assign(rNt, getIRegT(rN));
21953          }
21954
21955          IRTemp transAddr = newTemp(Ity_I32);
21956          assign(transAddr,
21957                 binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
21958                       mkexpr(rNt), mkU32(imm12)));
21959
21960          IRTemp oldRt = newTemp(Ity_I32);
21961          assign(oldRt, getIRegT(rT));
21962
21963          IRTemp llOldRt = newTemp(Ity_I32);
21964          assign(llOldRt, llGetIReg(rT));
21965
21966          if (isST) {
21967             IRExpr* data = NULL;
21968             switch (ty) {
21969                case Ity_I8:
21970                   data = unop(Iop_32to8, mkexpr(oldRt));
21971                   break;
21972                case Ity_I16:
21973                   data = unop(Iop_32to16, mkexpr(oldRt));
21974                   break;
21975               case Ity_I32:
21976                   data = mkexpr(oldRt);
21977                   break;
21978               default:
21979                  vassert(0);
21980             }
21981             storeGuardedLE(mkexpr(transAddr), data, condT);
21982          } else {
21983             IRTemp    newRt = newTemp(Ity_I32);
21984             IRLoadGOp widen = ILGop_INVALID;
21985             switch (ty) {
21986                case Ity_I8:
21987                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
21988                case Ity_I16:
21989                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
21990                case Ity_I32:
21991                   widen = ILGop_Ident32; break;
21992                default:
21993                   vassert(0);
21994             }
21995             loadGuardedLE(newRt, widen,
21996                           mkexpr(transAddr), mkexpr(llOldRt), condT);
21997             if (rT == 15) {
21998                vassert(loadsPC);
21999                /* We'll do the write to the PC just below */
22000             } else {
22001                vassert(!loadsPC);
22002                /* IRTemp_INVALID is OK here because in the case where
22003                   condT is false at run time, we're just putting the
22004                   old rT value back. */
22005                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
22006             }
22007
22008             if (loadsPC) {
22009                /* Presumably this is an interworking branch. */
22010                vassert(rT == 15);
22011                vassert(condT == IRTemp_INVALID); /* due to check above */
22012                llPutIReg(15, mkexpr(newRt));
22013                dres.jk_StopHere = Ijk_Boring;
22014                dres.whatNext    = Dis_StopHere;
22015             }
22016          }
22017
22018          DIP("%s.w r%u, [r%u, +#%u]\n", nm, rT, rN, imm12);
22019
22020          goto decode_success;
22021       }
22022    }
22023
22024    /* -------------- LDRD/STRD reg+/-#imm8 -------------- */
22025    /* Doubleword loads and stores of the form:
22026          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]    or
22027          ldrd/strd  Rt, Rt2, [Rn], #+/-imm8    or
22028          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]!
22029    */
22030    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,0) && INSN0(6,6) == 1) {
22031       UInt bP   = INSN0(8,8);
22032       UInt bU   = INSN0(7,7);
22033       UInt bW   = INSN0(5,5);
22034       UInt bL   = INSN0(4,4);  // 1: load  0: store
22035       UInt rN   = INSN0(3,0);
22036       UInt rT   = INSN1(15,12);
22037       UInt rT2  = INSN1(11,8);
22038       UInt imm8 = INSN1(7,0);
22039
22040       Bool valid = True;
22041       if (bP == 0 && bW == 0)                 valid = False;
22042       if (bW == 1 && (rN == rT || rN == rT2)) valid = False;
22043       if (isBadRegT(rT) || isBadRegT(rT2))    valid = False;
22044       if (bL == 1 && rT == rT2)               valid = False;
22045       /* It's OK to use PC as the base register only in the
22046          following case: ldrd Rt, Rt2, [PC, #+/-imm8] */
22047       if (rN == 15 && (bL == 0/*store*/
22048                        || bW == 1/*wb*/))     valid = False;
22049
22050       if (valid) {
22051          IRTemp preAddr = newTemp(Ity_I32);
22052          assign(preAddr, 15 == rN
22053                            ? binop(Iop_And32, getIRegT(15), mkU32(~3U))
22054                            : getIRegT(rN));
22055
22056          IRTemp postAddr = newTemp(Ity_I32);
22057          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
22058                                 mkexpr(preAddr), mkU32(imm8 << 2)));
22059
22060          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
22061
22062          /* For almost all cases, we do the writeback after the transfers.
22063             However, that leaves the stack "uncovered" in cases like:
22064                strd    rD, [sp, #-8]
22065                strd    rD, [sp, #-16]
22066             In which case, do the writeback to SP now, instead of later.
22067             This is bad in that it makes the insn non-restartable if the
22068             accesses fault, but at least keeps Memcheck happy. */
22069          Bool writeback_already_done = False;
22070          if (bL == 0/*store*/ && bW == 1/*wb*/
22071              && rN == 13 && rN != rT && rN != rT2
22072              && bU == 0/*minus*/
22073              && ((imm8 << 2) == 8 || (imm8 << 2) == 16)) {
22074             putIRegT(rN, mkexpr(postAddr), condT);
22075             writeback_already_done = True;
22076          }
22077
22078          if (bL == 0) {
22079             IRTemp oldRt  = newTemp(Ity_I32);
22080             IRTemp oldRt2 = newTemp(Ity_I32);
22081             assign(oldRt,  getIRegT(rT));
22082             assign(oldRt2, getIRegT(rT2));
22083             storeGuardedLE( mkexpr(transAddr),
22084                             mkexpr(oldRt), condT );
22085             storeGuardedLE( binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
22086                             mkexpr(oldRt2), condT );
22087          } else {
22088             IRTemp oldRt  = newTemp(Ity_I32);
22089             IRTemp oldRt2 = newTemp(Ity_I32);
22090             IRTemp newRt  = newTemp(Ity_I32);
22091             IRTemp newRt2 = newTemp(Ity_I32);
22092             assign(oldRt,  llGetIReg(rT));
22093             assign(oldRt2, llGetIReg(rT2));
22094             loadGuardedLE( newRt, ILGop_Ident32,
22095                            mkexpr(transAddr),
22096                            mkexpr(oldRt), condT );
22097             loadGuardedLE( newRt2, ILGop_Ident32,
22098                            binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
22099                            mkexpr(oldRt2), condT );
22100             /* Put unconditionally, since we already switched on the condT
22101                in the guarded loads. */
22102             putIRegT(rT,  mkexpr(newRt),  IRTemp_INVALID);
22103             putIRegT(rT2, mkexpr(newRt2), IRTemp_INVALID);
22104          }
22105
22106          if (bW == 1 && !writeback_already_done) {
22107             putIRegT(rN, mkexpr(postAddr), condT);
22108          }
22109
22110          const HChar* nm = bL ? "ldrd" : "strd";
22111
22112          if (bP == 1 && bW == 0) {
22113             DIP("%s.w r%u, r%u, [r%u, #%c%u]\n",
22114                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
22115          }
22116          else if (bP == 1 && bW == 1) {
22117             DIP("%s.w r%u, r%u, [r%u, #%c%u]!\n",
22118                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
22119          }
22120          else {
22121             vassert(bP == 0 && bW == 1);
22122             DIP("%s.w r%u, r%u, [r%u], #%c%u\n",
22123                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
22124          }
22125
22126          goto decode_success;
22127       }
22128    }
22129
22130    /* -------------- (T3) Bcond.W label -------------- */
22131    /* This variant carries its own condition, so can't be part of an
22132       IT block ... */
22133    if (INSN0(15,11) == BITS5(1,1,1,1,0)
22134        && INSN1(15,14) == BITS2(1,0)
22135        && INSN1(12,12) == 0) {
22136       UInt cond = INSN0(9,6);
22137       if (cond != ARMCondAL && cond != ARMCondNV) {
22138          UInt uimm21
22139             =   (INSN0(10,10) << (1 + 1 + 6 + 11 + 1))
22140               | (INSN1(11,11) << (1 + 6 + 11 + 1))
22141               | (INSN1(13,13) << (6 + 11 + 1))
22142               | (INSN0(5,0)   << (11 + 1))
22143               | (INSN1(10,0)  << 1);
22144          uimm21 <<= 11;
22145          Int simm21 = (Int)uimm21;
22146          simm21 >>= 11;
22147
22148          vassert(0 == (guest_R15_curr_instr_notENC & 1));
22149          UInt dst = simm21 + guest_R15_curr_instr_notENC + 4;
22150
22151          /* Not allowed in an IT block; SIGILL if so. */
22152          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
22153
22154          IRTemp kondT = newTemp(Ity_I32);
22155          assign( kondT, mk_armg_calculate_condition(cond) );
22156          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
22157                             Ijk_Boring,
22158                             IRConst_U32(dst | 1/*CPSR.T*/),
22159                             OFFB_R15T ));
22160          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 4)
22161                               | 1 /*CPSR.T*/ ));
22162          dres.jk_StopHere = Ijk_Boring;
22163          dres.whatNext    = Dis_StopHere;
22164          DIP("b%s.w 0x%x\n", nCC(cond), dst);
22165          goto decode_success;
22166       }
22167    }
22168
22169    /* ---------------- (T4) B.W label ---------------- */
22170    /* ... whereas this variant doesn't carry its own condition, so it
22171       has to be either unconditional or the conditional by virtue of
22172       being the last in an IT block.  The upside is that there's 4
22173       more bits available for the jump offset, so it has a 16-times
22174       greater branch range than the T3 variant. */
22175    if (INSN0(15,11) == BITS5(1,1,1,1,0)
22176        && INSN1(15,14) == BITS2(1,0)
22177        && INSN1(12,12) == 1) {
22178       if (1) {
22179          UInt bS  = INSN0(10,10);
22180          UInt bJ1 = INSN1(13,13);
22181          UInt bJ2 = INSN1(11,11);
22182          UInt bI1 = 1 ^ (bJ1 ^ bS);
22183          UInt bI2 = 1 ^ (bJ2 ^ bS);
22184          UInt uimm25
22185             =   (bS          << (1 + 1 + 10 + 11 + 1))
22186               | (bI1         << (1 + 10 + 11 + 1))
22187               | (bI2         << (10 + 11 + 1))
22188               | (INSN0(9,0)  << (11 + 1))
22189               | (INSN1(10,0) << 1);
22190          uimm25 <<= 7;
22191          Int simm25 = (Int)uimm25;
22192          simm25 >>= 7;
22193
22194          vassert(0 == (guest_R15_curr_instr_notENC & 1));
22195          UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
22196
22197          /* If in an IT block, must be the last insn. */
22198          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
22199
22200          // go uncond
22201          mk_skip_over_T32_if_cond_is_false(condT);
22202          condT = IRTemp_INVALID;
22203          // now uncond
22204
22205          // branch to dst
22206          llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
22207          dres.jk_StopHere = Ijk_Boring;
22208          dres.whatNext    = Dis_StopHere;
22209          DIP("b.w 0x%x\n", dst);
22210          goto decode_success;
22211       }
22212    }
22213
22214    /* ------------------ TBB, TBH ------------------ */
22215    if (INSN0(15,4) == 0xE8D && INSN1(15,5) == 0x780) {
22216       UInt rN = INSN0(3,0);
22217       UInt rM = INSN1(3,0);
22218       UInt bH = INSN1(4,4);
22219       if (bH/*ATC*/ || (rN != 13 && !isBadRegT(rM))) {
22220          /* Must be last or not-in IT block */
22221          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
22222          /* Go uncond */
22223          mk_skip_over_T32_if_cond_is_false(condT);
22224          condT = IRTemp_INVALID;
22225
22226          IRExpr* ea
22227              = binop(Iop_Add32,
22228                      getIRegT(rN),
22229                      bH ? binop(Iop_Shl32, getIRegT(rM), mkU8(1))
22230                         : getIRegT(rM));
22231
22232          IRTemp delta = newTemp(Ity_I32);
22233          if (bH) {
22234             assign(delta, unop(Iop_16Uto32, loadLE(Ity_I16, ea)));
22235          } else {
22236             assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
22237          }
22238
22239          llPutIReg(
22240             15,
22241             binop(Iop_Or32,
22242                   binop(Iop_Add32,
22243                         getIRegT(15),
22244                         binop(Iop_Shl32, mkexpr(delta), mkU8(1))
22245                   ),
22246                   mkU32(1)
22247          ));
22248          dres.jk_StopHere = Ijk_Boring;
22249          dres.whatNext    = Dis_StopHere;
22250          DIP("tb%c [r%u, r%u%s]\n",
22251              bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
22252          goto decode_success;
22253       }
22254    }
22255
22256    /* ------------------ UBFX ------------------ */
22257    /* ------------------ SBFX ------------------ */
22258    /* There's also ARM versions of same, but it doesn't seem worth the
22259       hassle to common up the handling (it's only a couple of C
22260       statements). */
22261    if ((INSN0(15,4) == 0xF3C // UBFX
22262         || INSN0(15,4) == 0xF34) // SBFX
22263        && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
22264       UInt rN  = INSN0(3,0);
22265       UInt rD  = INSN1(11,8);
22266       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
22267       UInt wm1 = INSN1(4,0);
22268       UInt msb =  lsb + wm1;
22269       if (!isBadRegT(rD) && !isBadRegT(rN) && msb <= 31) {
22270          Bool   isU  = INSN0(15,4) == 0xF3C;
22271          IRTemp src  = newTemp(Ity_I32);
22272          IRTemp tmp  = newTemp(Ity_I32);
22273          IRTemp res  = newTemp(Ity_I32);
22274          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
22275          vassert(msb <= 31);
22276          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
22277
22278          assign(src, getIRegT(rN));
22279          assign(tmp, binop(Iop_And32,
22280                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
22281                            mkU32(mask)));
22282          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
22283                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
22284                            mkU8(31-wm1)));
22285
22286          putIRegT(rD, mkexpr(res), condT);
22287
22288          DIP("%s r%u, r%u, #%u, #%u\n",
22289              isU ? "ubfx" : "sbfx", rD, rN, lsb, wm1 + 1);
22290          goto decode_success;
22291       }
22292    }
22293
22294    /* ------------------ UXTB ------------------ */
22295    /* ------------------ UXTH ------------------ */
22296    /* ------------------ SXTB ------------------ */
22297    /* ------------------ SXTH ------------------ */
22298    /* ----------------- UXTB16 ----------------- */
22299    /* ----------------- SXTB16 ----------------- */
22300    /* FIXME: this is an exact duplicate of the ARM version.  They
22301       should be commoned up. */
22302    if ((INSN0(15,0) == 0xFA5F     // UXTB
22303         || INSN0(15,0) == 0xFA1F  // UXTH
22304         || INSN0(15,0) == 0xFA4F  // SXTB
22305         || INSN0(15,0) == 0xFA0F  // SXTH
22306         || INSN0(15,0) == 0xFA3F  // UXTB16
22307         || INSN0(15,0) == 0xFA2F) // SXTB16
22308        && INSN1(15,12) == BITS4(1,1,1,1)
22309        && INSN1(7,6) == BITS2(1,0)) {
22310       UInt rD = INSN1(11,8);
22311       UInt rM = INSN1(3,0);
22312       UInt rot = INSN1(5,4);
22313       if (!isBadRegT(rD) && !isBadRegT(rM)) {
22314          const HChar* nm = "???";
22315          IRTemp srcT = newTemp(Ity_I32);
22316          IRTemp rotT = newTemp(Ity_I32);
22317          IRTemp dstT = newTemp(Ity_I32);
22318          assign(srcT, getIRegT(rM));
22319          assign(rotT, genROR32(srcT, 8 * rot));
22320          switch (INSN0(15,0)) {
22321             case 0xFA5F: // UXTB
22322                nm = "uxtb";
22323                assign(dstT, unop(Iop_8Uto32,
22324                                  unop(Iop_32to8, mkexpr(rotT))));
22325                break;
22326             case 0xFA1F: // UXTH
22327                nm = "uxth";
22328                assign(dstT, unop(Iop_16Uto32,
22329                                  unop(Iop_32to16, mkexpr(rotT))));
22330                break;
22331             case 0xFA4F: // SXTB
22332                nm = "sxtb";
22333                assign(dstT, unop(Iop_8Sto32,
22334                                  unop(Iop_32to8, mkexpr(rotT))));
22335                break;
22336             case 0xFA0F: // SXTH
22337                nm = "sxth";
22338                assign(dstT, unop(Iop_16Sto32,
22339                                  unop(Iop_32to16, mkexpr(rotT))));
22340                break;
22341             case 0xFA3F: // UXTB16
22342                nm = "uxtb16";
22343                assign(dstT, binop(Iop_And32, mkexpr(rotT),
22344                                              mkU32(0x00FF00FF)));
22345                break;
22346             case 0xFA2F: { // SXTB16
22347                nm = "sxtb16";
22348                IRTemp lo32 = newTemp(Ity_I32);
22349                IRTemp hi32 = newTemp(Ity_I32);
22350                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
22351                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
22352                assign(
22353                   dstT,
22354                   binop(Iop_Or32,
22355                         binop(Iop_And32,
22356                               unop(Iop_8Sto32,
22357                                    unop(Iop_32to8, mkexpr(lo32))),
22358                               mkU32(0xFFFF)),
22359                         binop(Iop_Shl32,
22360                               unop(Iop_8Sto32,
22361                                    unop(Iop_32to8, mkexpr(hi32))),
22362                               mkU8(16))
22363                ));
22364                break;
22365             }
22366             default:
22367                vassert(0);
22368          }
22369          putIRegT(rD, mkexpr(dstT), condT);
22370          DIP("%s r%u, r%u, ror #%u\n", nm, rD, rM, 8 * rot);
22371          goto decode_success;
22372       }
22373    }
22374
22375    /* -------------- MUL.W Rd, Rn, Rm -------------- */
22376    if (INSN0(15,4) == 0xFB0
22377        && (INSN1(15,0) & 0xF0F0) == 0xF000) {
22378       UInt rN = INSN0(3,0);
22379       UInt rD = INSN1(11,8);
22380       UInt rM = INSN1(3,0);
22381       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22382          IRTemp res = newTemp(Ity_I32);
22383          assign(res, binop(Iop_Mul32, getIRegT(rN), getIRegT(rM)));
22384          putIRegT(rD, mkexpr(res), condT);
22385          DIP("mul.w r%u, r%u, r%u\n", rD, rN, rM);
22386          goto decode_success;
22387       }
22388    }
22389
22390    /* -------------- SDIV.W Rd, Rn, Rm -------------- */
22391    if (INSN0(15,4) == 0xFB9
22392        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
22393       UInt rN = INSN0(3,0);
22394       UInt rD = INSN1(11,8);
22395       UInt rM = INSN1(3,0);
22396       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22397          IRTemp res  = newTemp(Ity_I32);
22398          IRTemp argL = newTemp(Ity_I32);
22399          IRTemp argR = newTemp(Ity_I32);
22400          assign(argL, getIRegT(rN));
22401          assign(argR, getIRegT(rM));
22402          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
22403          putIRegT(rD, mkexpr(res), condT);
22404          DIP("sdiv.w r%u, r%u, r%u\n", rD, rN, rM);
22405          goto decode_success;
22406       }
22407    }
22408
22409    /* -------------- UDIV.W Rd, Rn, Rm -------------- */
22410    if (INSN0(15,4) == 0xFBB
22411        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
22412       UInt rN = INSN0(3,0);
22413       UInt rD = INSN1(11,8);
22414       UInt rM = INSN1(3,0);
22415       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22416          IRTemp res  = newTemp(Ity_I32);
22417          IRTemp argL = newTemp(Ity_I32);
22418          IRTemp argR = newTemp(Ity_I32);
22419          assign(argL, getIRegT(rN));
22420          assign(argR, getIRegT(rM));
22421          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
22422          putIRegT(rD, mkexpr(res), condT);
22423          DIP("udiv.w r%u, r%u, r%u\n", rD, rN, rM);
22424          goto decode_success;
22425       }
22426    }
22427
22428    /* ------------------ {U,S}MULL ------------------ */
22429    if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
22430        && INSN1(7,4) == BITS4(0,0,0,0)) {
22431       UInt isU  = INSN0(5,5);
22432       UInt rN   = INSN0(3,0);
22433       UInt rDlo = INSN1(15,12);
22434       UInt rDhi = INSN1(11,8);
22435       UInt rM   = INSN1(3,0);
22436       if (!isBadRegT(rDhi) && !isBadRegT(rDlo)
22437           && !isBadRegT(rN) && !isBadRegT(rM) && rDlo != rDhi) {
22438          IRTemp res   = newTemp(Ity_I64);
22439          assign(res, binop(isU ? Iop_MullU32 : Iop_MullS32,
22440                            getIRegT(rN), getIRegT(rM)));
22441          putIRegT( rDhi, unop(Iop_64HIto32, mkexpr(res)), condT );
22442          putIRegT( rDlo, unop(Iop_64to32, mkexpr(res)), condT );
22443          DIP("%cmull r%u, r%u, r%u, r%u\n",
22444              isU ? 'u' : 's', rDlo, rDhi, rN, rM);
22445          goto decode_success;
22446       }
22447    }
22448
22449    /* ------------------ ML{A,S} ------------------ */
22450    if (INSN0(15,4) == 0xFB0
22451        && (   INSN1(7,4) == BITS4(0,0,0,0)    // MLA
22452            || INSN1(7,4) == BITS4(0,0,0,1))) { // MLS
22453       UInt rN = INSN0(3,0);
22454       UInt rA = INSN1(15,12);
22455       UInt rD = INSN1(11,8);
22456       UInt rM = INSN1(3,0);
22457       if (!isBadRegT(rD) && !isBadRegT(rN)
22458           && !isBadRegT(rM) && !isBadRegT(rA)) {
22459          Bool   isMLA = INSN1(7,4) == BITS4(0,0,0,0);
22460          IRTemp res   = newTemp(Ity_I32);
22461          assign(res,
22462                 binop(isMLA ? Iop_Add32 : Iop_Sub32,
22463                       getIRegT(rA),
22464                       binop(Iop_Mul32, getIRegT(rN), getIRegT(rM))));
22465          putIRegT(rD, mkexpr(res), condT);
22466          DIP("%s r%u, r%u, r%u, r%u\n",
22467              isMLA ? "mla" : "mls", rD, rN, rM, rA);
22468          goto decode_success;
22469       }
22470    }
22471
22472    /* ------------------ (T3) ADR ------------------ */
22473    if ((INSN0(15,0) == 0xF20F || INSN0(15,0) == 0xF60F)
22474        && INSN1(15,15) == 0) {
22475       /* rD = align4(PC) + imm32 */
22476       UInt rD = INSN1(11,8);
22477       if (!isBadRegT(rD)) {
22478          UInt imm32 = (INSN0(10,10) << 11)
22479                       | (INSN1(14,12) << 8) | INSN1(7,0);
22480          putIRegT(rD, binop(Iop_Add32,
22481                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
22482                             mkU32(imm32)),
22483                       condT);
22484          DIP("add r%u, pc, #%u\n", rD, imm32);
22485          goto decode_success;
22486       }
22487    }
22488
22489    /* ----------------- (T1) UMLAL ----------------- */
22490    /* ----------------- (T1) SMLAL ----------------- */
22491    if ((INSN0(15,4) == 0xFBE // UMLAL
22492         || INSN0(15,4) == 0xFBC) // SMLAL
22493        && INSN1(7,4) == BITS4(0,0,0,0)) {
22494       UInt rN   = INSN0(3,0);
22495       UInt rDlo = INSN1(15,12);
22496       UInt rDhi = INSN1(11,8);
22497       UInt rM   = INSN1(3,0);
22498       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
22499           && !isBadRegT(rM) && rDhi != rDlo) {
22500          Bool   isS   = INSN0(15,4) == 0xFBC;
22501          IRTemp argL  = newTemp(Ity_I32);
22502          IRTemp argR  = newTemp(Ity_I32);
22503          IRTemp old   = newTemp(Ity_I64);
22504          IRTemp res   = newTemp(Ity_I64);
22505          IRTemp resHi = newTemp(Ity_I32);
22506          IRTemp resLo = newTemp(Ity_I32);
22507          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
22508          assign( argL, getIRegT(rM));
22509          assign( argR, getIRegT(rN));
22510          assign( old, binop(Iop_32HLto64, getIRegT(rDhi), getIRegT(rDlo)) );
22511          assign( res, binop(Iop_Add64,
22512                             mkexpr(old),
22513                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
22514          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
22515          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
22516          putIRegT( rDhi, mkexpr(resHi), condT );
22517          putIRegT( rDlo, mkexpr(resLo), condT );
22518          DIP("%cmlal r%u, r%u, r%u, r%u\n",
22519              isS ? 's' : 'u', rDlo, rDhi, rN, rM);
22520          goto decode_success;
22521       }
22522    }
22523
22524    /* ------------------ (T1) UMAAL ------------------ */
22525    if (INSN0(15,4) == 0xFBE && INSN1(7,4) == BITS4(0,1,1,0)) {
22526       UInt rN   = INSN0(3,0);
22527       UInt rDlo = INSN1(15,12);
22528       UInt rDhi = INSN1(11,8);
22529       UInt rM   = INSN1(3,0);
22530       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
22531           && !isBadRegT(rM) && rDhi != rDlo) {
22532          IRTemp argN   = newTemp(Ity_I32);
22533          IRTemp argM   = newTemp(Ity_I32);
22534          IRTemp argDhi = newTemp(Ity_I32);
22535          IRTemp argDlo = newTemp(Ity_I32);
22536          IRTemp res    = newTemp(Ity_I64);
22537          IRTemp resHi  = newTemp(Ity_I32);
22538          IRTemp resLo  = newTemp(Ity_I32);
22539          assign( argN,   getIRegT(rN) );
22540          assign( argM,   getIRegT(rM) );
22541          assign( argDhi, getIRegT(rDhi) );
22542          assign( argDlo, getIRegT(rDlo) );
22543          assign( res,
22544                  binop(Iop_Add64,
22545                        binop(Iop_Add64,
22546                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
22547                              unop(Iop_32Uto64, mkexpr(argDhi))),
22548                        unop(Iop_32Uto64, mkexpr(argDlo))) );
22549          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
22550          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
22551          putIRegT( rDhi, mkexpr(resHi), condT );
22552          putIRegT( rDlo, mkexpr(resLo), condT );
22553          DIP("umaal r%u, r%u, r%u, r%u\n", rDlo, rDhi, rN, rM);
22554          goto decode_success;
22555       }
22556    }
22557
22558    /* ------------------- (T1) SMMUL{R} ------------------ */
22559    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
22560        && INSN0(6,4) == BITS3(1,0,1)
22561        && INSN1(15,12) == BITS4(1,1,1,1)
22562        && INSN1(7,5) == BITS3(0,0,0)) {
22563       UInt bitR = INSN1(4,4);
22564       UInt rD = INSN1(11,8);
22565       UInt rM = INSN1(3,0);
22566       UInt rN = INSN0(3,0);
22567       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22568          IRExpr* res
22569          = unop(Iop_64HIto32,
22570                 binop(Iop_Add64,
22571                       binop(Iop_MullS32, getIRegT(rN), getIRegT(rM)),
22572                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
22573          putIRegT(rD, res, condT);
22574          DIP("smmul%s r%u, r%u, r%u\n",
22575              bitR ? "r" : "", rD, rN, rM);
22576          goto decode_success;
22577       }
22578    }
22579
22580    /* ------------------- (T1) SMMLA{R} ------------------ */
22581    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
22582        && INSN0(6,4) == BITS3(1,0,1)
22583        && INSN1(7,5) == BITS3(0,0,0)) {
22584       UInt bitR = INSN1(4,4);
22585       UInt rA = INSN1(15,12);
22586       UInt rD = INSN1(11,8);
22587       UInt rM = INSN1(3,0);
22588       UInt rN = INSN0(3,0);
22589       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && (rA != 13)) {
22590          IRExpr* res
22591          = unop(Iop_64HIto32,
22592                 binop(Iop_Add64,
22593                       binop(Iop_Add64,
22594                             binop(Iop_32HLto64, getIRegT(rA), mkU32(0)),
22595                             binop(Iop_MullS32, getIRegT(rN), getIRegT(rM))),
22596                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
22597          putIRegT(rD, res, condT);
22598          DIP("smmla%s r%u, r%u, r%u, r%u\n",
22599              bitR ? "r" : "", rD, rN, rM, rA);
22600          goto decode_success;
22601       }
22602    }
22603
22604    /* ------------------ (T2) ADR ------------------ */
22605    if ((INSN0(15,0) == 0xF2AF || INSN0(15,0) == 0xF6AF)
22606        && INSN1(15,15) == 0) {
22607       /* rD = align4(PC) - imm32 */
22608       UInt rD = INSN1(11,8);
22609       if (!isBadRegT(rD)) {
22610          UInt imm32 = (INSN0(10,10) << 11)
22611                       | (INSN1(14,12) << 8) | INSN1(7,0);
22612          putIRegT(rD, binop(Iop_Sub32,
22613                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
22614                             mkU32(imm32)),
22615                       condT);
22616          DIP("sub r%u, pc, #%u\n", rD, imm32);
22617          goto decode_success;
22618       }
22619    }
22620
22621    /* ------------------- (T1) BFI ------------------- */
22622    /* ------------------- (T1) BFC ------------------- */
22623    if (INSN0(15,4) == 0xF36 && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
22624       UInt rD  = INSN1(11,8);
22625       UInt rN  = INSN0(3,0);
22626       UInt msb = INSN1(4,0);
22627       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
22628       if (isBadRegT(rD) || rN == 13 || msb < lsb) {
22629          /* undecodable; fall through */
22630       } else {
22631          IRTemp src    = newTemp(Ity_I32);
22632          IRTemp olddst = newTemp(Ity_I32);
22633          IRTemp newdst = newTemp(Ity_I32);
22634          UInt   mask   = ((UInt)1) << (msb - lsb);
22635          mask = (mask - 1) + mask;
22636          vassert(mask != 0); // guaranteed by "msb < lsb" check above
22637          mask <<= lsb;
22638
22639          assign(src, rN == 15 ? mkU32(0) : getIRegT(rN));
22640          assign(olddst, getIRegT(rD));
22641          assign(newdst,
22642                 binop(Iop_Or32,
22643                    binop(Iop_And32,
22644                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
22645                          mkU32(mask)),
22646                    binop(Iop_And32,
22647                          mkexpr(olddst),
22648                          mkU32(~mask)))
22649                );
22650
22651          putIRegT(rD, mkexpr(newdst), condT);
22652
22653          if (rN == 15) {
22654             DIP("bfc r%u, #%u, #%u\n",
22655                 rD, lsb, msb-lsb+1);
22656          } else {
22657             DIP("bfi r%u, r%u, #%u, #%u\n",
22658                 rD, rN, lsb, msb-lsb+1);
22659          }
22660          goto decode_success;
22661       }
22662    }
22663
22664    /* ------------------- (T1) SXTAH ------------------- */
22665    /* ------------------- (T1) UXTAH ------------------- */
22666    if ((INSN0(15,4) == 0xFA1      // UXTAH
22667         || INSN0(15,4) == 0xFA0)  // SXTAH
22668        && INSN1(15,12) == BITS4(1,1,1,1)
22669        && INSN1(7,6) == BITS2(1,0)) {
22670       Bool isU = INSN0(15,4) == 0xFA1;
22671       UInt rN  = INSN0(3,0);
22672       UInt rD  = INSN1(11,8);
22673       UInt rM  = INSN1(3,0);
22674       UInt rot = INSN1(5,4);
22675       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22676          IRTemp srcL = newTemp(Ity_I32);
22677          IRTemp srcR = newTemp(Ity_I32);
22678          IRTemp res  = newTemp(Ity_I32);
22679          assign(srcR, getIRegT(rM));
22680          assign(srcL, getIRegT(rN));
22681          assign(res,  binop(Iop_Add32,
22682                             mkexpr(srcL),
22683                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
22684                                  unop(Iop_32to16,
22685                                       genROR32(srcR, 8 * rot)))));
22686          putIRegT(rD, mkexpr(res), condT);
22687          DIP("%cxtah r%u, r%u, r%u, ror #%u\n",
22688              isU ? 'u' : 's', rD, rN, rM, rot);
22689          goto decode_success;
22690       }
22691    }
22692
22693    /* ------------------- (T1) SXTAB ------------------- */
22694    /* ------------------- (T1) UXTAB ------------------- */
22695    if ((INSN0(15,4) == 0xFA5      // UXTAB
22696         || INSN0(15,4) == 0xFA4)  // SXTAB
22697        && INSN1(15,12) == BITS4(1,1,1,1)
22698        && INSN1(7,6) == BITS2(1,0)) {
22699       Bool isU = INSN0(15,4) == 0xFA5;
22700       UInt rN  = INSN0(3,0);
22701       UInt rD  = INSN1(11,8);
22702       UInt rM  = INSN1(3,0);
22703       UInt rot = INSN1(5,4);
22704       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22705          IRTemp srcL = newTemp(Ity_I32);
22706          IRTemp srcR = newTemp(Ity_I32);
22707          IRTemp res  = newTemp(Ity_I32);
22708          assign(srcR, getIRegT(rM));
22709          assign(srcL, getIRegT(rN));
22710          assign(res,  binop(Iop_Add32,
22711                             mkexpr(srcL),
22712                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
22713                                  unop(Iop_32to8,
22714                                       genROR32(srcR, 8 * rot)))));
22715          putIRegT(rD, mkexpr(res), condT);
22716          DIP("%cxtab r%u, r%u, r%u, ror #%u\n",
22717              isU ? 'u' : 's', rD, rN, rM, rot);
22718          goto decode_success;
22719       }
22720    }
22721
22722    /* ------------------- (T1) CLZ ------------------- */
22723    if (INSN0(15,4) == 0xFAB
22724        && INSN1(15,12) == BITS4(1,1,1,1)
22725        && INSN1(7,4) == BITS4(1,0,0,0)) {
22726       UInt rM1 = INSN0(3,0);
22727       UInt rD  = INSN1(11,8);
22728       UInt rM2 = INSN1(3,0);
22729       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22730          IRTemp arg = newTemp(Ity_I32);
22731          IRTemp res = newTemp(Ity_I32);
22732          assign(arg, getIRegT(rM1));
22733          assign(res, IRExpr_ITE(
22734                         binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
22735                         mkU32(32),
22736                         unop(Iop_Clz32, mkexpr(arg))
22737          ));
22738          putIRegT(rD, mkexpr(res), condT);
22739          DIP("clz r%u, r%u\n", rD, rM1);
22740          goto decode_success;
22741       }
22742    }
22743
22744    /* ------------------- (T1) RBIT ------------------- */
22745    if (INSN0(15,4) == 0xFA9
22746        && INSN1(15,12) == BITS4(1,1,1,1)
22747        && INSN1(7,4) == BITS4(1,0,1,0)) {
22748       UInt rM1 = INSN0(3,0);
22749       UInt rD  = INSN1(11,8);
22750       UInt rM2 = INSN1(3,0);
22751       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22752          IRTemp arg = newTemp(Ity_I32);
22753          assign(arg, getIRegT(rM1));
22754          IRTemp res = gen_BITREV(arg);
22755          putIRegT(rD, mkexpr(res), condT);
22756          DIP("rbit r%u, r%u\n", rD, rM1);
22757          goto decode_success;
22758       }
22759    }
22760
22761    /* ------------------- (T2) REV   ------------------- */
22762    /* ------------------- (T2) REV16 ------------------- */
22763    if (INSN0(15,4) == 0xFA9
22764        && INSN1(15,12) == BITS4(1,1,1,1)
22765        && (   INSN1(7,4) == BITS4(1,0,0,0)     // REV
22766            || INSN1(7,4) == BITS4(1,0,0,1))) { // REV16
22767       UInt rM1   = INSN0(3,0);
22768       UInt rD    = INSN1(11,8);
22769       UInt rM2   = INSN1(3,0);
22770       Bool isREV = INSN1(7,4) == BITS4(1,0,0,0);
22771       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22772          IRTemp arg = newTemp(Ity_I32);
22773          assign(arg, getIRegT(rM1));
22774          IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
22775          putIRegT(rD, mkexpr(res), condT);
22776          DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM1);
22777          goto decode_success;
22778       }
22779    }
22780
22781    /* ------------------- (T2) REVSH ------------------ */
22782    if (INSN0(15,4) == 0xFA9
22783        && INSN1(15,12) == BITS4(1,1,1,1)
22784        && INSN1(7,4) == BITS4(1,0,1,1)) {
22785       UInt rM1 = INSN0(3,0);
22786       UInt rM2 = INSN1(3,0);
22787       UInt rD  = INSN1(11,8);
22788       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22789          IRTemp irt_rM  = newTemp(Ity_I32);
22790          IRTemp irt_hi  = newTemp(Ity_I32);
22791          IRTemp irt_low = newTemp(Ity_I32);
22792          IRTemp irt_res = newTemp(Ity_I32);
22793          assign(irt_rM, getIRegT(rM1));
22794          assign(irt_hi,
22795                 binop(Iop_Sar32,
22796                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
22797                       mkU8(16)
22798                 )
22799          );
22800          assign(irt_low,
22801                 binop(Iop_And32,
22802                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
22803                       mkU32(0xFF)
22804                 )
22805          );
22806          assign(irt_res,
22807                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
22808          );
22809          putIRegT(rD, mkexpr(irt_res), condT);
22810          DIP("revsh r%u, r%u\n", rD, rM1);
22811          goto decode_success;
22812       }
22813    }
22814
22815    /* -------------- (T1) MSR apsr, reg -------------- */
22816    if (INSN0(15,4) == 0xF38
22817        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(9,0) == 0x000) {
22818       UInt rN          = INSN0(3,0);
22819       UInt write_ge    = INSN1(10,10);
22820       UInt write_nzcvq = INSN1(11,11);
22821       if (!isBadRegT(rN) && (write_nzcvq || write_ge)) {
22822          IRTemp rNt = newTemp(Ity_I32);
22823          assign(rNt, getIRegT(rN));
22824          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
22825          DIP("msr cpsr_%s%s, r%u\n",
22826              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
22827          goto decode_success;
22828       }
22829    }
22830
22831    /* -------------- (T1) MRS reg, apsr -------------- */
22832    if (INSN0(15,0) == 0xF3EF
22833        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(7,0) == 0x00) {
22834       UInt rD = INSN1(11,8);
22835       if (!isBadRegT(rD)) {
22836          IRTemp apsr = synthesise_APSR();
22837          putIRegT( rD, mkexpr(apsr), condT );
22838          DIP("mrs r%u, cpsr\n", rD);
22839          goto decode_success;
22840       }
22841    }
22842
22843    /* ----------------- (T1) LDREX ----------------- */
22844    if (INSN0(15,4) == 0xE85 && INSN1(11,8) == BITS4(1,1,1,1)) {
22845       UInt rN   = INSN0(3,0);
22846       UInt rT   = INSN1(15,12);
22847       UInt imm8 = INSN1(7,0);
22848       if (!isBadRegT(rT) && rN != 15) {
22849          IRTemp res;
22850          // go uncond
22851          mk_skip_over_T32_if_cond_is_false( condT );
22852          // now uncond
22853          res = newTemp(Ity_I32);
22854          stmt( IRStmt_LLSC(Iend_LE,
22855                            res,
22856                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
22857                            NULL/*this is a load*/ ));
22858          putIRegT(rT, mkexpr(res), IRTemp_INVALID);
22859          DIP("ldrex r%u, [r%u, #+%u]\n", rT, rN, imm8 * 4);
22860          goto decode_success;
22861       }
22862    }
22863
22864    /* --------------- (T1) LDREX{B,H} --------------- */
22865    if (INSN0(15,4) == 0xE8D
22866        && (INSN1(11,0) == 0xF4F || INSN1(11,0) == 0xF5F)) {
22867       UInt rN  = INSN0(3,0);
22868       UInt rT  = INSN1(15,12);
22869       Bool isH = INSN1(11,0) == 0xF5F;
22870       if (!isBadRegT(rT) && rN != 15) {
22871          IRTemp res;
22872          // go uncond
22873          mk_skip_over_T32_if_cond_is_false( condT );
22874          // now uncond
22875          res = newTemp(isH ? Ity_I16 : Ity_I8);
22876          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
22877                            NULL/*this is a load*/ ));
22878          putIRegT(rT, unop(isH ? Iop_16Uto32 : Iop_8Uto32, mkexpr(res)),
22879                       IRTemp_INVALID);
22880          DIP("ldrex%c r%u, [r%u]\n", isH ? 'h' : 'b', rT, rN);
22881          goto decode_success;
22882       }
22883    }
22884
22885    /* --------------- (T1) LDREXD --------------- */
22886    if (INSN0(15,4) == 0xE8D && INSN1(7,0) == 0x7F) {
22887       UInt rN  = INSN0(3,0);
22888       UInt rT  = INSN1(15,12);
22889       UInt rT2 = INSN1(11,8);
22890       if (!isBadRegT(rT) && !isBadRegT(rT2) && rT != rT2 && rN != 15) {
22891          IRTemp res;
22892          // go uncond
22893          mk_skip_over_T32_if_cond_is_false( condT );
22894          // now uncond
22895          res = newTemp(Ity_I64);
22896          // FIXME: assumes little-endian guest
22897          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
22898                            NULL/*this is a load*/ ));
22899          // FIXME: assumes little-endian guest
22900          putIRegT(rT,  unop(Iop_64to32,   mkexpr(res)), IRTemp_INVALID);
22901          putIRegT(rT2, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID);
22902          DIP("ldrexd r%u, r%u, [r%u]\n", rT, rT2, rN);
22903          goto decode_success;
22904       }
22905    }
22906
22907    /* ----------------- (T1) STREX ----------------- */
22908    if (INSN0(15,4) == 0xE84) {
22909       UInt rN   = INSN0(3,0);
22910       UInt rT   = INSN1(15,12);
22911       UInt rD   = INSN1(11,8);
22912       UInt imm8 = INSN1(7,0);
22913       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
22914           && rD != rN && rD != rT) {
22915          IRTemp resSC1, resSC32;
22916          // go uncond
22917          mk_skip_over_T32_if_cond_is_false( condT );
22918          // now uncond
22919          /* Ok, now we're unconditional.  Do the store. */
22920          resSC1 = newTemp(Ity_I1);
22921          stmt( IRStmt_LLSC(Iend_LE,
22922                            resSC1,
22923                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
22924                            getIRegT(rT)) );
22925          /* Set rD to 1 on failure, 0 on success.  Currently we have
22926             resSC1 == 0 on failure, 1 on success. */
22927          resSC32 = newTemp(Ity_I32);
22928          assign(resSC32,
22929                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
22930          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
22931          DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4);
22932          goto decode_success;
22933       }
22934    }
22935
22936    /* --------------- (T1) STREX{B,H} --------------- */
22937    if (INSN0(15,4) == 0xE8C
22938        && (INSN1(11,4) == 0xF4 || INSN1(11,4) == 0xF5)) {
22939       UInt rN  = INSN0(3,0);
22940       UInt rT  = INSN1(15,12);
22941       UInt rD  = INSN1(3,0);
22942       Bool isH = INSN1(11,4) == 0xF5;
22943       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
22944           && rD != rN && rD != rT) {
22945          IRTemp resSC1, resSC32;
22946          // go uncond
22947          mk_skip_over_T32_if_cond_is_false( condT );
22948          // now uncond
22949          /* Ok, now we're unconditional.  Do the store. */
22950          resSC1 = newTemp(Ity_I1);
22951          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN),
22952                            unop(isH ? Iop_32to16 : Iop_32to8,
22953                                 getIRegT(rT))) );
22954          /* Set rD to 1 on failure, 0 on success.  Currently we have
22955             resSC1 == 0 on failure, 1 on success. */
22956          resSC32 = newTemp(Ity_I32);
22957          assign(resSC32,
22958                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
22959          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
22960          DIP("strex%c r%u, r%u, [r%u]\n", isH ? 'h' : 'b', rD, rT, rN);
22961          goto decode_success;
22962       }
22963    }
22964
22965    /* ---------------- (T1) STREXD ---------------- */
22966    if (INSN0(15,4) == 0xE8C && INSN1(7,4) == BITS4(0,1,1,1)) {
22967       UInt rN  = INSN0(3,0);
22968       UInt rT  = INSN1(15,12);
22969       UInt rT2 = INSN1(11,8);
22970       UInt rD  = INSN1(3,0);
22971       if (!isBadRegT(rD) && !isBadRegT(rT) && !isBadRegT(rT2)
22972           && rN != 15 && rD != rN && rD != rT && rD != rT2) {
22973          IRTemp resSC1, resSC32, data;
22974          // go uncond
22975          mk_skip_over_T32_if_cond_is_false( condT );
22976          // now uncond
22977          /* Ok, now we're unconditional.  Do the store. */
22978          resSC1 = newTemp(Ity_I1);
22979          data = newTemp(Ity_I64);
22980          // FIXME: assumes little-endian guest
22981          assign(data, binop(Iop_32HLto64, getIRegT(rT2), getIRegT(rT)));
22982          // FIXME: assumes little-endian guest
22983          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), mkexpr(data)));
22984          /* Set rD to 1 on failure, 0 on success.  Currently we have
22985             resSC1 == 0 on failure, 1 on success. */
22986          resSC32 = newTemp(Ity_I32);
22987          assign(resSC32,
22988                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
22989          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
22990          DIP("strexd r%u, r%u, r%u, [r%u]\n", rD, rT, rT2, rN);
22991          goto decode_success;
22992       }
22993    }
22994
22995    /* -------------- v7 barrier insns -------------- */
22996    if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF00) == 0x8F00) {
22997       /* FIXME: should this be unconditional? */
22998       /* XXX this isn't really right, is it?  The generated IR does
22999          them unconditionally.  I guess it doesn't matter since it
23000          doesn't do any harm to do them even when the guarding
23001          condition is false -- it's just a performance loss. */
23002       switch (INSN1(7,0)) {
23003          case 0x4F: /* DSB sy */
23004          case 0x4E: /* DSB st */
23005          case 0x4B: /* DSB ish */
23006          case 0x4A: /* DSB ishst */
23007          case 0x47: /* DSB nsh */
23008          case 0x46: /* DSB nshst */
23009          case 0x43: /* DSB osh */
23010          case 0x42: /* DSB oshst */
23011             stmt( IRStmt_MBE(Imbe_Fence) );
23012             DIP("DSB\n");
23013             goto decode_success;
23014          case 0x5F: /* DMB sy */
23015          case 0x5E: /* DMB st */
23016          case 0x5B: /* DMB ish */
23017          case 0x5A: /* DMB ishst */
23018          case 0x57: /* DMB nsh */
23019          case 0x56: /* DMB nshst */
23020          case 0x53: /* DMB osh */
23021          case 0x52: /* DMB oshst */
23022             stmt( IRStmt_MBE(Imbe_Fence) );
23023             DIP("DMB\n");
23024             goto decode_success;
23025          case 0x6F: /* ISB */
23026             stmt( IRStmt_MBE(Imbe_Fence) );
23027             DIP("ISB\n");
23028             goto decode_success;
23029          default:
23030             break;
23031       }
23032    }
23033
23034    /* ---------------------- PLD{,W} ---------------------- */
23035    if ((INSN0(15,4) & 0xFFD) == 0xF89 && INSN1(15,12) == 0xF) {
23036       /* FIXME: should this be unconditional? */
23037       /* PLD/PLDW immediate, encoding T1 */
23038       UInt rN    = INSN0(3,0);
23039       UInt bW    = INSN0(5,5);
23040       UInt imm12 = INSN1(11,0);
23041       DIP("pld%s [r%u, #%u]\n", bW ? "w" : "",  rN, imm12);
23042       goto decode_success;
23043    }
23044
23045    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,8) == 0xFC) {
23046       /* FIXME: should this be unconditional? */
23047       /* PLD/PLDW immediate, encoding T2 */
23048       UInt rN    = INSN0(3,0);
23049       UInt bW    = INSN0(5,5);
23050       UInt imm8  = INSN1(7,0);
23051       DIP("pld%s [r%u, #-%u]\n", bW ? "w" : "",  rN, imm8);
23052       goto decode_success;
23053    }
23054
23055    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,6) == 0x3C0) {
23056       /* FIXME: should this be unconditional? */
23057       /* PLD/PLDW register, encoding T1 */
23058       UInt rN   = INSN0(3,0);
23059       UInt rM   = INSN1(3,0);
23060       UInt bW   = INSN0(5,5);
23061       UInt imm2 = INSN1(5,4);
23062       if (!isBadRegT(rM)) {
23063          DIP("pld%s [r%u, r%u, lsl %u]\n", bW ? "w" : "", rN, rM, imm2);
23064          goto decode_success;
23065       }
23066       /* fall through */
23067    }
23068
23069    /* -------------- read CP15 TPIDRURO register ------------- */
23070    /* mrc     p15, 0,  r0, c13, c0, 3  up to
23071       mrc     p15, 0, r14, c13, c0, 3
23072    */
23073    /* I don't know whether this is really v7-only.  But anyway, we
23074       have to support it since arm-linux uses TPIDRURO as a thread
23075       state register. */
23076    if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F70)) {
23077       UInt rD = INSN1(15,12);
23078       if (!isBadRegT(rD)) {
23079          putIRegT(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32), condT);
23080          DIP("mrc p15,0, r%u, c13, c0, 3\n", rD);
23081          goto decode_success;
23082       }
23083       /* fall through */
23084    }
23085
23086    /* ------------ read/write CP15 TPIDRURW register ----------- */
23087    /* mcr     p15, 0, r0,  c13, c0, 2 (r->cr xfer)  up to
23088       mcr     p15, 0, r14, c13, c0, 2
23089
23090       mrc     p15, 0, r0,  c13, c0, 2 (rc->r xfer)  up to
23091       mrc     p15, 0, r14, c13, c0, 2
23092    */
23093    if ((INSN0(15,0) == 0xEE0D) && (INSN1(11,0) == 0x0F50)) {
23094       UInt rS = INSN1(15,12);
23095       if (!isBadRegT(rS)) {
23096          putMiscReg32(OFFB_TPIDRURW, getIRegT(rS), condT);
23097          DIP("mcr p15,0, r%u, c13, c0, 2\n", rS);
23098          goto decode_success;
23099       }
23100       /* fall through */
23101    }
23102    if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F50)) {
23103       UInt rD = INSN1(15,12);
23104       if (!isBadRegT(rD)) {
23105          putIRegT(rD, IRExpr_Get(OFFB_TPIDRURW, Ity_I32), condT);
23106          DIP("mrc p15,0, r%u, c13, c0, 2\n", rD);
23107          goto decode_success;
23108       }
23109       /* fall through */
23110    }
23111
23112    /* -------------- read CP15 PMUSRENR register ------------- */
23113    /* mrc     p15, 0, r0,  c9, c14, 0  up to
23114       mrc     p15, 0, r14, c9, c14, 0
23115       See comment on the ARM equivalent of this (above) for details.
23116    */
23117    if ((INSN0(15,0) == 0xEE19) && (INSN1(11,0) == 0x0F1E)) {
23118       UInt rD = INSN1(15,12);
23119       if (!isBadRegT(rD)) {
23120          putIRegT(rD, mkU32(0), condT);
23121          DIP("mrc p15,0, r%u, c9, c14, 0\n", rD);
23122          goto decode_success;
23123       }
23124       /* fall through */
23125    }
23126
23127    /* ------------------- CLREX ------------------ */
23128    if (INSN0(15,0) == 0xF3BF && INSN1(15,0) == 0x8F2F) {
23129       /* AFAICS, this simply cancels a (all?) reservations made by a
23130          (any?) preceding LDREX(es).  Arrange to hand it through to
23131          the back end. */
23132       mk_skip_over_T32_if_cond_is_false( condT );
23133       stmt( IRStmt_MBE(Imbe_CancelReservation) );
23134       DIP("clrex\n");
23135       goto decode_success;
23136    }
23137
23138    /* ------------------- NOP ------------------ */
23139    if (INSN0(15,0) == 0xF3AF && INSN1(15,0) == 0x8000) {
23140       DIP("nop\n");
23141       goto decode_success;
23142    }
23143
23144    /* -------------- (T1) LDRT reg+#imm8 -------------- */
23145    /* Load Register Unprivileged:
23146       ldrt Rt, [Rn, #imm8]
23147    */
23148    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,1)
23149        && INSN1(11,8) == BITS4(1,1,1,0)) {
23150       UInt rT    = INSN1(15,12);
23151       UInt rN    = INSN0(3,0);
23152       UInt imm8  = INSN1(7,0);
23153       Bool valid = True;
23154       if (rN == 15 || isBadRegT(rT)) valid = False;
23155       if (valid) {
23156          put_ITSTATE(old_itstate);
23157          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23158          IRTemp newRt = newTemp(Ity_I32);
23159          loadGuardedLE( newRt, ILGop_Ident32, ea, llGetIReg(rT), condT );
23160          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23161          put_ITSTATE(new_itstate);
23162          DIP("ldrt r%u, [r%u, #%u]\n", rT, rN, imm8);
23163          goto decode_success;
23164       }
23165    }
23166
23167    /* -------------- (T1) STRT reg+#imm8 -------------- */
23168    /* Store Register Unprivileged:
23169       strt Rt, [Rn, #imm8]
23170    */
23171    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,0)
23172        && INSN1(11,8) == BITS4(1,1,1,0)) {
23173       UInt rT    = INSN1(15,12);
23174       UInt rN    = INSN0(3,0);
23175       UInt imm8  = INSN1(7,0);
23176       Bool valid = True;
23177       if (rN == 15 || isBadRegT(rT)) valid = False;
23178       if (valid) {
23179          put_ITSTATE(old_itstate);
23180          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23181          storeGuardedLE( address, llGetIReg(rT), condT );
23182          put_ITSTATE(new_itstate);
23183          DIP("strt r%u, [r%u, #%u]\n", rT, rN, imm8);
23184          goto decode_success;
23185       }
23186    }
23187
23188    /* -------------- (T1) STRBT reg+#imm8 -------------- */
23189    /* Store Register Byte Unprivileged:
23190       strbt Rt, [Rn, #imm8]
23191    */
23192    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,0)
23193        && INSN1(11,8) == BITS4(1,1,1,0)) {
23194       UInt rT    = INSN1(15,12);
23195       UInt rN    = INSN0(3,0);
23196       UInt imm8  = INSN1(7,0);
23197       Bool valid = True;
23198       if (rN == 15 || isBadRegT(rT)) valid = False;
23199       if (valid) {
23200          put_ITSTATE(old_itstate);
23201          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23202          IRExpr* data = unop(Iop_32to8, llGetIReg(rT));
23203          storeGuardedLE( address, data, condT );
23204          put_ITSTATE(new_itstate);
23205          DIP("strbt r%u, [r%u, #%u]\n", rT, rN, imm8);
23206          goto decode_success;
23207       }
23208    }
23209
23210    /* -------------- (T1) LDRHT reg+#imm8 -------------- */
23211    /* Load Register Halfword Unprivileged:
23212       ldrht Rt, [Rn, #imm8]
23213    */
23214    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,1)
23215        && INSN1(11,8) == BITS4(1,1,1,0)) {
23216       UInt rN    = INSN0(3,0);
23217       Bool valid = True;
23218       if (rN == 15) {
23219          /* In this case our instruction is LDRH (literal), in fact:
23220             LDRH (literal) was realized earlier, so we don't want to
23221             make it twice. */
23222          valid = False;
23223       }
23224       UInt rT    = INSN1(15,12);
23225       UInt imm8  = INSN1(7,0);
23226       if (isBadRegT(rT)) valid = False;
23227       if (valid) {
23228          put_ITSTATE(old_itstate);
23229          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23230          IRTemp newRt = newTemp(Ity_I32);
23231          loadGuardedLE( newRt, ILGop_16Uto32, ea, llGetIReg(rT), condT );
23232          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23233          put_ITSTATE(new_itstate);
23234          DIP("ldrht r%u, [r%u, #%u]\n", rT, rN, imm8);
23235          goto decode_success;
23236       }
23237    }
23238
23239    /* -------------- (T1) LDRSHT reg+#imm8 -------------- */
23240    /* Load Register Signed Halfword Unprivileged:
23241       ldrsht Rt, [Rn, #imm8]
23242    */
23243    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(1,1)
23244        && INSN1(11,8) == BITS4(1,1,1,0)) {
23245       UInt rN    = INSN0(3,0);
23246       Bool valid = True;
23247       if (rN == 15) {
23248          /* In this case our instruction is LDRSH (literal), in fact:
23249             LDRSH (literal) was realized earlier, so we don't want to
23250             make it twice. */
23251          valid = False;
23252       }
23253       UInt rT    = INSN1(15,12);
23254       UInt imm8  = INSN1(7,0);
23255       if (isBadRegT(rT)) valid = False;
23256       if (valid) {
23257          put_ITSTATE(old_itstate);
23258          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23259          IRTemp newRt = newTemp(Ity_I32);
23260          loadGuardedLE( newRt, ILGop_16Sto32, ea, llGetIReg(rT), condT );
23261          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23262          put_ITSTATE(new_itstate);
23263          DIP("ldrsht r%u, [r%u, #%u]\n", rT, rN, imm8);
23264          goto decode_success;
23265       }
23266    }
23267
23268    /* -------------- (T1) STRHT reg+#imm8 -------------- */
23269    /* Store Register Halfword Unprivileged:
23270       strht Rt, [Rn, #imm8]
23271    */
23272    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,0)
23273        && INSN1(11,8) == BITS4(1,1,1,0)) {
23274       UInt rT    = INSN1(15,12);
23275       UInt rN    = INSN0(3,0);
23276       UInt imm8  = INSN1(7,0);
23277       Bool valid = True;
23278       if (rN == 15 || isBadRegT(rT)) valid = False;
23279       if (valid) {
23280          put_ITSTATE(old_itstate);
23281          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23282          IRExpr* data = unop(Iop_32to16, llGetIReg(rT));
23283          storeGuardedLE( address, data, condT );
23284          put_ITSTATE(new_itstate);
23285          DIP("strht r%u, [r%u, #%u]\n", rT, rN, imm8);
23286          goto decode_success;
23287       }
23288    }
23289
23290    /* -------------- (T1) LDRBT reg+#imm8 -------------- */
23291    /* Load Register Byte Unprivileged:
23292       ldrbt Rt, [Rn, #imm8]
23293    */
23294    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,1)
23295        && INSN1(11,8) == BITS4(1,1,1,0)) {
23296       UInt rN    = INSN0(3,0);
23297       UInt rT    = INSN1(15,12);
23298       UInt imm8  = INSN1(7,0);
23299       Bool valid = True;
23300       if (rN == 15 /* insn is LDRB (literal) */) valid = False;
23301       if (isBadRegT(rT)) valid = False;
23302       if (valid) {
23303          put_ITSTATE(old_itstate);
23304          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23305          IRTemp newRt = newTemp(Ity_I32);
23306          loadGuardedLE( newRt, ILGop_8Uto32, ea, llGetIReg(rT), condT );
23307          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23308          put_ITSTATE(new_itstate);
23309          DIP("ldrbt r%u, [r%u, #%u]\n", rT, rN, imm8);
23310          goto decode_success;
23311       }
23312    }
23313
23314    /* -------------- (T1) LDRSBT reg+#imm8 -------------- */
23315    /* Load Register Signed Byte Unprivileged:
23316       ldrsbt Rt, [Rn, #imm8]
23317    */
23318    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
23319        && INSN1(11,8) == BITS4(1,1,1,0)) {
23320       UInt rN    = INSN0(3,0);
23321       Bool valid = True;
23322       UInt rT    = INSN1(15,12);
23323       UInt imm8  = INSN1(7,0);
23324       if (rN == 15 /* insn is LDRSB (literal) */) valid = False;
23325       if (isBadRegT(rT)) valid = False;
23326       if (valid) {
23327          put_ITSTATE(old_itstate);
23328          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23329          IRTemp newRt = newTemp(Ity_I32);
23330          loadGuardedLE( newRt, ILGop_8Sto32, ea, llGetIReg(rT), condT );
23331          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23332          put_ITSTATE(new_itstate);
23333          DIP("ldrsbt r%u, [r%u, #%u]\n", rT, rN, imm8);
23334          goto decode_success;
23335       }
23336    }
23337
23338    /* -------------- (T1) PLI reg+#imm12 -------------- */
23339    /* Preload Instruction:
23340       pli [Rn, #imm12]
23341    */
23342    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,1,0) && INSN0(5,4) == BITS2(0,1)
23343        && INSN1(15,12) == BITS4(1,1,1,1)) {
23344       UInt rN    = INSN0(3,0);
23345       UInt imm12 = INSN1(11,0);
23346       if (rN != 15) {
23347          DIP("pli [r%u, #%u]\n", rN, imm12);
23348          goto decode_success;
23349       }
23350    }
23351
23352    /* -------------- (T2) PLI reg-#imm8 -------------- */
23353    /* Preload Instruction:
23354       pli [Rn, #-imm8]
23355    */
23356    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
23357        && INSN1(15,8) == BITS8(1,1,1,1,1,1,0,0)) {
23358       UInt rN   = INSN0(3,0);
23359       UInt imm8 = INSN1(7,0);
23360       if (rN != 15) {
23361          DIP("pli [r%u, #-%u]\n", rN, imm8);
23362          goto decode_success;
23363       }
23364    }
23365
23366    /* -------------- (T3) PLI PC+/-#imm12 -------------- */
23367    /* Preload Instruction:
23368       pli [PC, #+/-imm12]
23369    */
23370    if (INSN0(15,8) == BITS8(1,1,1,1,1,0,0,1)
23371        && INSN0(6,0) == BITS7(0,0,1,1,1,1,1)
23372        && INSN1(15,12) == BITS4(1,1,1,1)) {
23373       UInt imm12 = INSN1(11,0);
23374       UInt bU    = INSN0(7,7);
23375       DIP("pli [pc, #%c%u]\n", bU == 1 ? '+' : '-', imm12);
23376       goto decode_success;
23377    }
23378
23379    /* ----------------------------------------------------------- */
23380    /* -- VFP (CP 10, CP 11) instructions (in Thumb mode)       -- */
23381    /* ----------------------------------------------------------- */
23382
23383    if (INSN0(15,12) == BITS4(1,1,1,0)) {
23384       UInt insn28 = (INSN0(11,0) << 16) | INSN1(15,0);
23385       Bool ok_vfp = decode_CP10_CP11_instruction (
23386                        &dres, insn28, condT, ARMCondAL/*bogus*/,
23387                        True/*isT*/
23388                     );
23389       if (ok_vfp)
23390          goto decode_success;
23391    }
23392
23393    /* ----------------------------------------------------------- */
23394    /* -- NEON instructions (only v7 and below, in Thumb mode)  -- */
23395    /* ----------------------------------------------------------- */
23396
23397    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
23398       UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
23399       Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
23400                         &dres, insn32, condT, True/*isT*/
23401                      );
23402       if (ok_neon)
23403          goto decode_success;
23404    }
23405
23406    /* ----------------------------------------------------------- */
23407    /* -- v6 media instructions (in Thumb mode)                 -- */
23408    /* ----------------------------------------------------------- */
23409
23410    { UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
23411      Bool ok_v6m = decode_V6MEDIA_instruction(
23412                       &dres, insn32, condT, ARMCondAL/*bogus*/,
23413                       True/*isT*/
23414                    );
23415      if (ok_v6m)
23416         goto decode_success;
23417    }
23418
23419    /* ----------------------------------------------------------- */
23420    /* -- v8 instructions (in Thumb mode)                       -- */
23421    /* ----------------------------------------------------------- */
23422
23423    /* If we get here, it means that all attempts to decode the
23424       instruction as ARMv7 or earlier have failed.  So, if we're doing
23425       ARMv8 or later, here is the point to try for it. */
23426
23427    if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
23428       UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
23429       Bool ok_v8
23430          = decode_V8_instruction( &dres, insn32, condT, True/*isT*/,
23431                                   old_itstate, new_itstate );
23432       if (ok_v8)
23433          goto decode_success;
23434    }
23435
23436    /* ----------------------------------------------------------- */
23437    /* -- Undecodable                                           -- */
23438    /* ----------------------------------------------------------- */
23439
23440    goto decode_failure;
23441    /*NOTREACHED*/
23442
23443   decode_failure:
23444    /* All decode failures end up here. */
23445    if (sigill_diag)
23446       vex_printf("disInstr(thumb): unhandled instruction: "
23447                  "0x%04x 0x%04x\n", (UInt)insn0, (UInt)insn1);
23448
23449    /* Back up ITSTATE to the initial value for this instruction.
23450       If we don't do that, any subsequent restart of the instruction
23451       will restart with the wrong value. */
23452    if (old_itstate != IRTemp_INVALID)
23453       put_ITSTATE(old_itstate);
23454
23455    /* Tell the dispatcher that this insn cannot be decoded, and so has
23456       not been executed, and (is currently) the next to be executed.
23457       R15 should be up-to-date since it made so at the start of each
23458       insn, but nevertheless be paranoid and update it again right
23459       now. */
23460    vassert(0 == (guest_R15_curr_instr_notENC & 1));
23461    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
23462    dres.len         = 0;
23463    dres.whatNext    = Dis_StopHere;
23464    dres.jk_StopHere = Ijk_NoDecode;
23465    return dres;
23466
23467   decode_success:
23468    /* All decode successes end up here. */
23469    vassert(dres.len == 4 || dres.len == 2 || dres.len == 20);
23470    switch (dres.whatNext) {
23471       case Dis_Continue:
23472          llPutIReg(15, mkU32(dres.len + (guest_R15_curr_instr_notENC | 1)));
23473          break;
23474       case Dis_StopHere:
23475          break;
23476       default:
23477          vassert(0);
23478    }
23479
23480    DIP("\n");
23481
23482    return dres;
23483
23484 #  undef INSN0
23485 #  undef INSN1
23486 }
23487
23488 #undef DIP
23489 #undef DIS
23490
23491
23492 /* Helper table for figuring out how many insns an IT insn
23493    conditionalises.
23494
23495    An ITxyz instruction of the format "1011 1111 firstcond mask"
23496    conditionalises some number of instructions, as indicated by the
23497    following table.  A value of zero indicates the instruction is
23498    invalid in some way.
23499
23500    mask = 0 means this isn't an IT instruction
23501    fc = 15 (NV) means unpredictable
23502
23503    The line fc = 14 (AL) is different from the others; there are
23504    additional constraints in this case.
23505
23506           mask(0 ..                   15)
23507         +--------------------------------
23508    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23509    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23510         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23511         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23512         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23513         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23514         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23515         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23516         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23517         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23518         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23519         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23520         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23521         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23522         | 0 4 3 0 2 0 0 0 1 0 0 0 0 0 0 0
23523    15)  | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
23524
23525    To be conservative with the analysis, let's rule out the mask = 0
23526    case, since that isn't an IT insn at all.  But for all the other
23527    cases where the table contains zero, that means unpredictable, so
23528    let's say 4 to be conservative.  Hence we have a safe value for any
23529    IT (mask,fc) pair that the CPU would actually identify as an IT
23530    instruction.  The final table is
23531
23532           mask(0 ..                   15)
23533         +--------------------------------
23534    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23535    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23536         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23537         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23538         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23539         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23540         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23541         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23542         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23543         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23544         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23545         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23546         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23547         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23548         | 0 4 3 4 2 4 4 4 1 4 4 4 4 4 4 4
23549    15)  | 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
23550 */
23551 static const UChar it_length_table[256]
23552    = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23553        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23554        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23555        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23556        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23557        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23558        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23559        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23560        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23561        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23562        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23563        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23564        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23565        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23566        0, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
23567        0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
23568      };
23569
23570
23571 /*------------------------------------------------------------*/
23572 /*--- Top-level fn                                         ---*/
23573 /*------------------------------------------------------------*/
23574
23575 /* Disassemble a single instruction into IR.  The instruction
23576    is located in host memory at &guest_code[delta]. */
23577
23578 DisResult disInstr_ARM ( IRSB*        irsb_IN,
23579                          const UChar* guest_code_IN,
23580                          Long         delta_ENCODED,
23581                          Addr         guest_IP_ENCODED,
23582                          VexArch      guest_arch,
23583                          const VexArchInfo* archinfo,
23584                          const VexAbiInfo*  abiinfo,
23585                          VexEndness   host_endness_IN,
23586                          Bool         sigill_diag_IN )
23587 {
23588    DisResult dres;
23589    Bool isThumb = (Bool)(guest_IP_ENCODED & 1);
23590
23591    /* Set globals (see top of this file) */
23592    vassert(guest_arch == VexArchARM);
23593
23594    irsb            = irsb_IN;
23595    host_endness    = host_endness_IN;
23596    __curr_is_Thumb = isThumb;
23597
23598    if (isThumb) {
23599       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED - 1;
23600    } else {
23601       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED;
23602    }
23603
23604    if (isThumb) {
23605       dres = disInstr_THUMB_WRK ( &guest_code_IN[delta_ENCODED - 1],
23606                                   archinfo, abiinfo, sigill_diag_IN );
23607    } else {
23608       dres = disInstr_ARM_WRK ( &guest_code_IN[delta_ENCODED],
23609                                 archinfo, abiinfo, sigill_diag_IN );
23610    }
23611
23612    return dres;
23613 }
23614
23615 /* Test program for the conversion of IRCmpF64Result values to VFP
23616    nzcv values.  See handling of FCMPD et al above. */
23617 /*
23618 UInt foo ( UInt x )
23619 {
23620    UInt ix    = ((x >> 5) & 3) | (x & 1);
23621    UInt termL = (((((ix ^ 1) << 30) - 1) >> 29) + 1);
23622    UInt termR = (ix & (ix >> 1) & 1);
23623    return termL  -  termR;
23624 }
23625
23626 void try ( char* s, UInt ir, UInt req )
23627 {
23628    UInt act = foo(ir);
23629    printf("%s 0x%02x -> req %d%d%d%d act %d%d%d%d (0x%x)\n",
23630           s, ir, (req >> 3) & 1, (req >> 2) & 1,
23631                  (req >> 1) & 1, (req >> 0) & 1,
23632                  (act >> 3) & 1, (act >> 2) & 1,
23633                  (act >> 1) & 1, (act >> 0) & 1, act);
23634
23635 }
23636
23637 int main ( void )
23638 {
23639    printf("\n");
23640    try("UN", 0x45, 0b0011);
23641    try("LT", 0x01, 0b1000);
23642    try("GT", 0x00, 0b0010);
23643    try("EQ", 0x40, 0b0110);
23644    printf("\n");
23645    return 0;
23646 }
23647 */
23648
23649 /* Spare code for doing reference implementations of various 64-bit
23650    SIMD interleaves/deinterleaves/concatenation ops. */
23651 /*
23652 // Split a 64 bit value into 4 16 bit ones, in 32-bit IRTemps with
23653 // the top halves guaranteed to be zero.
23654 static void break64to16s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
23655                            IRTemp* out0, IRTemp v64 )
23656 {
23657   if (out3) *out3 = newTemp(Ity_I32);
23658   if (out2) *out2 = newTemp(Ity_I32);
23659   if (out1) *out1 = newTemp(Ity_I32);
23660   if (out0) *out0 = newTemp(Ity_I32);
23661   IRTemp hi32 = newTemp(Ity_I32);
23662   IRTemp lo32 = newTemp(Ity_I32);
23663   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
23664   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
23665   if (out3) assign(*out3, binop(Iop_Shr32, mkexpr(hi32), mkU8(16)));
23666   if (out2) assign(*out2, binop(Iop_And32, mkexpr(hi32), mkU32(0xFFFF)));
23667   if (out1) assign(*out1, binop(Iop_Shr32, mkexpr(lo32), mkU8(16)));
23668   if (out0) assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFFFF)));
23669 }
23670
23671 // Make a 64 bit value from 4 16 bit ones, each of which is in a 32 bit
23672 // IRTemp.
23673 static IRTemp mk64from16s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
23674 {
23675   IRTemp hi32 = newTemp(Ity_I32);
23676   IRTemp lo32 = newTemp(Ity_I32);
23677   assign(hi32,
23678          binop(Iop_Or32,
23679                binop(Iop_Shl32, mkexpr(in3), mkU8(16)),
23680                binop(Iop_And32, mkexpr(in2), mkU32(0xFFFF))));
23681   assign(lo32,
23682          binop(Iop_Or32,
23683                binop(Iop_Shl32, mkexpr(in1), mkU8(16)),
23684                binop(Iop_And32, mkexpr(in0), mkU32(0xFFFF))));
23685   IRTemp res = newTemp(Ity_I64);
23686   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
23687   return res;
23688 }
23689
23690 static IRExpr* mk_InterleaveLO16x4 ( IRTemp a3210, IRTemp b3210 )
23691 {
23692   // returns a1 b1 a0 b0
23693   IRTemp a1, a0, b1, b0;
23694   break64to16s(NULL, NULL, &a1, &a0, a3210);
23695   break64to16s(NULL, NULL, &b1, &b0, b3210);
23696   return mkexpr(mk64from16s(a1, b1, a0, b0));
23697 }
23698
23699 static IRExpr* mk_InterleaveHI16x4 ( IRTemp a3210, IRTemp b3210 )
23700 {
23701   // returns a3 b3 a2 b2
23702   IRTemp a3, a2, b3, b2;
23703   break64to16s(&a3, &a2, NULL, NULL, a3210);
23704   break64to16s(&b3, &b2, NULL, NULL, b3210);
23705   return mkexpr(mk64from16s(a3, b3, a2, b2));
23706 }
23707
23708 static IRExpr* mk_CatEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23709 {
23710   // returns a2 a0 b2 b0
23711   IRTemp a2, a0, b2, b0;
23712   break64to16s(NULL, &a2, NULL, &a0, a3210);
23713   break64to16s(NULL, &b2, NULL, &b0, b3210);
23714   return mkexpr(mk64from16s(a2, a0, b2, b0));
23715 }
23716
23717 static IRExpr* mk_CatOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23718 {
23719   // returns a3 a1 b3 b1
23720   IRTemp a3, a1, b3, b1;
23721   break64to16s(&a3, NULL, &a1, NULL, a3210);
23722   break64to16s(&b3, NULL, &b1, NULL, b3210);
23723   return mkexpr(mk64from16s(a3, a1, b3, b1));
23724 }
23725
23726 static IRExpr* mk_InterleaveOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23727 {
23728   // returns a3 b3 a1 b1
23729   IRTemp a3, b3, a1, b1;
23730   break64to16s(&a3, NULL, &a1, NULL, a3210);
23731   break64to16s(&b3, NULL, &b1, NULL, b3210);
23732   return mkexpr(mk64from16s(a3, b3, a1, b1));
23733 }
23734
23735 static IRExpr* mk_InterleaveEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23736 {
23737   // returns a2 b2 a0 b0
23738   IRTemp a2, b2, a0, b0;
23739   break64to16s(NULL, &a2, NULL, &a0, a3210);
23740   break64to16s(NULL, &b2, NULL, &b0, b3210);
23741   return mkexpr(mk64from16s(a2, b2, a0, b0));
23742 }
23743
23744 static void break64to8s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
23745                           IRTemp* out4, IRTemp* out3, IRTemp* out2,
23746                           IRTemp* out1,IRTemp* out0, IRTemp v64 )
23747 {
23748   if (out7) *out7 = newTemp(Ity_I32);
23749   if (out6) *out6 = newTemp(Ity_I32);
23750   if (out5) *out5 = newTemp(Ity_I32);
23751   if (out4) *out4 = newTemp(Ity_I32);
23752   if (out3) *out3 = newTemp(Ity_I32);
23753   if (out2) *out2 = newTemp(Ity_I32);
23754   if (out1) *out1 = newTemp(Ity_I32);
23755   if (out0) *out0 = newTemp(Ity_I32);
23756   IRTemp hi32 = newTemp(Ity_I32);
23757   IRTemp lo32 = newTemp(Ity_I32);
23758   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
23759   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
23760   if (out7)
23761     assign(*out7, binop(Iop_And32,
23762                         binop(Iop_Shr32, mkexpr(hi32), mkU8(24)),
23763                         mkU32(0xFF)));
23764   if (out6)
23765     assign(*out6, binop(Iop_And32,
23766                         binop(Iop_Shr32, mkexpr(hi32), mkU8(16)),
23767                         mkU32(0xFF)));
23768   if (out5)
23769     assign(*out5, binop(Iop_And32,
23770                         binop(Iop_Shr32, mkexpr(hi32), mkU8(8)),
23771                         mkU32(0xFF)));
23772   if (out4)
23773     assign(*out4, binop(Iop_And32, mkexpr(hi32), mkU32(0xFF)));
23774   if (out3)
23775     assign(*out3, binop(Iop_And32,
23776                         binop(Iop_Shr32, mkexpr(lo32), mkU8(24)),
23777                         mkU32(0xFF)));
23778   if (out2)
23779     assign(*out2, binop(Iop_And32,
23780                         binop(Iop_Shr32, mkexpr(lo32), mkU8(16)),
23781                         mkU32(0xFF)));
23782   if (out1)
23783     assign(*out1, binop(Iop_And32,
23784                         binop(Iop_Shr32, mkexpr(lo32), mkU8(8)),
23785                         mkU32(0xFF)));
23786   if (out0)
23787     assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFF)));
23788 }
23789
23790 static IRTemp mk64from8s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
23791                            IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
23792 {
23793   IRTemp hi32 = newTemp(Ity_I32);
23794   IRTemp lo32 = newTemp(Ity_I32);
23795   assign(hi32,
23796          binop(Iop_Or32,
23797                binop(Iop_Or32,
23798                      binop(Iop_Shl32,
23799                            binop(Iop_And32, mkexpr(in7), mkU32(0xFF)),
23800                            mkU8(24)),
23801                      binop(Iop_Shl32,
23802                            binop(Iop_And32, mkexpr(in6), mkU32(0xFF)),
23803                            mkU8(16))),
23804                binop(Iop_Or32,
23805                      binop(Iop_Shl32,
23806                            binop(Iop_And32, mkexpr(in5), mkU32(0xFF)), mkU8(8)),
23807                      binop(Iop_And32,
23808                            mkexpr(in4), mkU32(0xFF)))));
23809   assign(lo32,
23810          binop(Iop_Or32,
23811                binop(Iop_Or32,
23812                      binop(Iop_Shl32,
23813                            binop(Iop_And32, mkexpr(in3), mkU32(0xFF)),
23814                            mkU8(24)),
23815                      binop(Iop_Shl32,
23816                            binop(Iop_And32, mkexpr(in2), mkU32(0xFF)),
23817                            mkU8(16))),
23818                binop(Iop_Or32,
23819                      binop(Iop_Shl32,
23820                            binop(Iop_And32, mkexpr(in1), mkU32(0xFF)), mkU8(8)),
23821                      binop(Iop_And32,
23822                            mkexpr(in0), mkU32(0xFF)))));
23823   IRTemp res = newTemp(Ity_I64);
23824   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
23825   return res;
23826 }
23827
23828 static IRExpr* mk_InterleaveLO8x8 ( IRTemp a76543210, IRTemp b76543210 )
23829 {
23830   // returns a3 b3 a2 b2 a1 b1 a0 b0
23831   IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
23832   break64to8s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
23833   break64to8s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
23834   return mkexpr(mk64from8s(a3, b3, a2, b2, a1, b1, a0, b0));
23835 }
23836
23837 static IRExpr* mk_InterleaveHI8x8 ( IRTemp a76543210, IRTemp b76543210 )
23838 {
23839   // returns a7 b7 a6 b6 a5 b5 a4 b4
23840   IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
23841   break64to8s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
23842   break64to8s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
23843   return mkexpr(mk64from8s(a7, b7, a6, b6, a5, b5, a4, b4));
23844 }
23845
23846 static IRExpr* mk_CatEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23847 {
23848   // returns a6 a4 a2 a0 b6 b4 b2 b0
23849   IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
23850   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
23851   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
23852   return mkexpr(mk64from8s(a6, a4, a2, a0, b6, b4, b2, b0));
23853 }
23854
23855 static IRExpr* mk_CatOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23856 {
23857   // returns a7 a5 a3 a1 b7 b5 b3 b1
23858   IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
23859   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
23860   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
23861   return mkexpr(mk64from8s(a7, a5, a3, a1, b7, b5, b3, b1));
23862 }
23863
23864 static IRExpr* mk_InterleaveEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23865 {
23866   // returns a6 b6 a4 b4 a2 b2 a0 b0
23867   IRTemp a6, b6, a4, b4, a2, b2, a0, b0;
23868   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
23869   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
23870   return mkexpr(mk64from8s(a6, b6, a4, b4, a2, b2, a0, b0));
23871 }
23872
23873 static IRExpr* mk_InterleaveOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23874 {
23875   // returns a7 b7 a5 b5 a3 b3 a1 b1
23876   IRTemp a7, b7, a5, b5, a3, b3, a1, b1;
23877   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
23878   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
23879   return mkexpr(mk64from8s(a7, b7, a5, b5, a3, b3, a1, b1));
23880 }
23881
23882 static IRExpr* mk_InterleaveLO32x2 ( IRTemp a10, IRTemp b10 )
23883 {
23884   // returns a0 b0
23885   return binop(Iop_32HLto64, unop(Iop_64to32, mkexpr(a10)),
23886                              unop(Iop_64to32, mkexpr(b10)));
23887 }
23888
23889 static IRExpr* mk_InterleaveHI32x2 ( IRTemp a10, IRTemp b10 )
23890 {
23891   // returns a1 b1
23892   return binop(Iop_32HLto64, unop(Iop_64HIto32, mkexpr(a10)),
23893                              unop(Iop_64HIto32, mkexpr(b10)));
23894 }
23895 */
23896
23897 /*--------------------------------------------------------------------*/
23898 /*--- end                                         guest_arm_toIR.c ---*/
23899 /*--------------------------------------------------------------------*/