VEX/priv/guest_arm_toIR.c

   1
   2 /*--------------------------------------------------------------------*/
   3 /*--- begin                                       guest_arm_toIR.c ---*/
   4 /*--------------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Valgrind, a dynamic binary instrumentation
   8    framework.
   9
  10    Copyright (C) 2004-2017 OpenWorks LLP
  11       info@open-works.net
  12
  13    NEON support is
  14    Copyright (C) 2010-2017 Samsung Electronics
  15    contributed by Dmitry Zhurikhin <zhur@ispras.ru>
  16               and Kirill Batuzov <batuzovk@ispras.ru>
  17
  18    This program is free software; you can redistribute it and/or
  19    modify it under the terms of the GNU General Public License as
  20    published by the Free Software Foundation; either version 2 of the
  21    License, or (at your option) any later version.
  22
  23    This program is distributed in the hope that it will be useful, but
  24    WITHOUT ANY WARRANTY; without even the implied warranty of
  25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  26    General Public License for more details.
  27
  28    You should have received a copy of the GNU General Public License
  29    along with this program; if not, see <http://www.gnu.org/licenses/>.
  30
  31    The GNU General Public License is contained in the file COPYING.
  32 */
  33
  34 /* XXXX thumb to check:
  35    that all cases where putIRegT writes r15, we generate a jump.
  36
  37    All uses of newTemp assign to an IRTemp and not a UInt
  38
  39    For all thumb loads and stores, including VFP ones, new-ITSTATE is
  40    backed out before the memory op, and restored afterwards.  This
  41    needs to happen even after we go uncond.  (and for sure it doesn't
  42    happen for VFP loads/stores right now).
  43
  44    VFP on thumb: check that we exclude all r13/r15 cases that we
  45    should.
  46
  47    XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
  48    taking into account the number of insns guarded by an IT.
  49
  50    remove the nasty hack, in the spechelper, of looking for Or32(...,
  51    0xE0) in as the first arg to armg_calculate_condition, and instead
  52    use Slice44 as specified in comments in the spechelper.
  53
  54    add specialisations for armg_calculate_flag_c and _v, as they
  55    are moderately often needed in Thumb code.
  56
  57    Correctness: ITSTATE handling in Thumb SVCs is wrong.
  58
  59    Correctness (obscure): in m_transtab, when invalidating code
  60    address ranges, invalidate up to 18 bytes after the end of the
  61    range.  This is because the ITSTATE optimisation at the top of
  62    _THUMB_WRK below analyses up to 18 bytes before the start of any
  63    given instruction, and so might depend on the invalidated area.
  64 */
  65
  66 /* Limitations, etc
  67
  68    - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
  69      These instructions are non-restartable in the case where the
  70      transfer(s) fault.
  71
  72    - SWP: the restart jump back is Ijk_Boring; it should be
  73      Ijk_NoRedir but that's expensive.  See comments on casLE() in
  74      guest_x86_toIR.c.
  75 */
  76
  77 /* "Special" instructions.
  78
  79    This instruction decoder can decode four special instructions
  80    which mean nothing natively (are no-ops as far as regs/mem are
  81    concerned) but have meaning for supporting Valgrind.  A special
  82    instruction is flagged by a 16-byte preamble:
  83
  84       E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
  85       (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
  86        mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
  87
  88    Following that, one of the following 3 are allowed
  89    (standard interpretation in parentheses):
  90
  91       E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
  92       E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
  93       E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
  94       E1899009 (orr r9,r9,r9)      IR injection
  95
  96    Any other bytes following the 16-byte preamble are illegal and
  97    constitute a failure in instruction decoding.  This all assumes
  98    that the preamble will never occur except in specific code
  99    fragments designed for Valgrind to catch.
 100 */
 101
 102 /* Translates ARM(v5) code to IR. */
 103
 104 #include "libvex_basictypes.h"
 105 #include "libvex_ir.h"
 106 #include "libvex.h"
 107 #include "libvex_guest_arm.h"
 108
 109 #include "main_util.h"
 110 #include "main_globals.h"
 111 #include "guest_generic_bb_to_IR.h"
 112 #include "guest_arm_defs.h"
 113
 114
 115 /*------------------------------------------------------------*/
 116 /*--- Globals                                              ---*/
 117 /*------------------------------------------------------------*/
 118
 119 /* These are set at the start of the translation of a instruction, so
 120    that we don't have to pass them around endlessly.  CONST means does
 121    not change during translation of the instruction.
 122 */
 123
 124 /* CONST: what is the host's endianness?  This has to do with float vs
 125    double register accesses on VFP, but it's complex and not properly
 126    thought out. */
 127 static VexEndness host_endness;
 128
 129 /* CONST: The guest address for the instruction currently being
 130    translated.  This is the real, "decoded" address (not subject
 131    to the CPSR.T kludge). */
 132 static Addr32 guest_R15_curr_instr_notENC;
 133
 134 /* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
 135    insn is Thumb (True) or ARM (False). */
 136 static Bool __curr_is_Thumb;
 137
 138 /* MOD: The IRSB* into which we're generating code. */
 139 static IRSB* irsb;
 140
 141 /* These are to do with handling writes to r15.  They are initially
 142    set at the start of disInstr_ARM_WRK to indicate no update,
 143    possibly updated during the routine, and examined again at the end.
 144    If they have been set to indicate a r15 update then a jump is
 145    generated.  Note, "explicit" jumps (b, bx, etc) are generated
 146    directly, not using this mechanism -- this is intended to handle
 147    the implicit-style jumps resulting from (eg) assigning to r15 as
 148    the result of insns we wouldn't normally consider branchy. */
 149
 150 /* MOD.  Initially False; set to True iff abovementioned handling is
 151    required. */
 152 static Bool r15written;
 153
 154 /* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
 155    is conditional, this holds the gating IRTemp :: Ity_I32.  If the
 156    branch to be generated is unconditional, this remains
 157    IRTemp_INVALID. */
 158 static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
 159
 160 /* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
 161    this holds the jump kind. */
 162 static IRTemp r15kind;
 163
 164
 165 /*------------------------------------------------------------*/
 166 /*--- Debugging output                                     ---*/
 167 /*------------------------------------------------------------*/
 168
 169 #define DIP(format, args...)           \
 170    if (vex_traceflags & VEX_TRACE_FE)  \
 171       vex_printf(format, ## args)
 172
 173 #define DIS(buf, format, args...)      \
 174    if (vex_traceflags & VEX_TRACE_FE)  \
 175       vex_sprintf(buf, format, ## args)
 176
 177 #define ASSERT_IS_THUMB \
 178    do { vassert(__curr_is_Thumb); } while (0)
 179
 180 #define ASSERT_IS_ARM \
 181    do { vassert(! __curr_is_Thumb); } while (0)
 182
 183
 184 /*------------------------------------------------------------*/
 185 /*--- Helper bits and pieces for deconstructing the        ---*/
 186 /*--- arm insn stream.                                     ---*/
 187 /*------------------------------------------------------------*/
 188
 189 /* Do a little-endian load of a 32-bit word, regardless of the
 190    endianness of the underlying host. */
 191 static inline UInt getUIntLittleEndianly ( const UChar* p )
 192 {
 193    UInt w = 0;
 194    w = (w << 8) | p[3];
 195    w = (w << 8) | p[2];
 196    w = (w << 8) | p[1];
 197    w = (w << 8) | p[0];
 198    return w;
 199 }
 200
 201 /* Do a little-endian load of a 16-bit word, regardless of the
 202    endianness of the underlying host. */
 203 static inline UShort getUShortLittleEndianly ( const UChar* p )
 204 {
 205    UShort w = 0;
 206    w = (w << 8) | p[1];
 207    w = (w << 8) | p[0];
 208    return w;
 209 }
 210
 211 static UInt ROR32 ( UInt x, UInt sh ) {
 212    vassert(sh >= 0 && sh < 32);
 213    if (sh == 0)
 214       return x;
 215    else
 216       return (x << (32-sh)) | (x >> sh);
 217 }
 218
 219 static Int popcount32 ( UInt x )
 220 {
 221    Int res = 0, i;
 222    for (i = 0; i < 32; i++) {
 223       res += (x & 1);
 224       x >>= 1;
 225    }
 226    return res;
 227 }
 228
 229 static UInt setbit32 ( UInt x, Int ix, UInt b )
 230 {
 231    UInt mask = 1 << ix;
 232    x &= ~mask;
 233    x |= ((b << ix) & mask);
 234    return x;
 235 }
 236
 237 #define BITS2(_b1,_b0) \
 238    (((_b1) << 1) | (_b0))
 239
 240 #define BITS3(_b2,_b1,_b0)                      \
 241   (((_b2) << 2) | ((_b1) << 1) | (_b0))
 242
 243 #define BITS4(_b3,_b2,_b1,_b0) \
 244    (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
 245
 246 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 247    ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
 248     | BITS4((_b3),(_b2),(_b1),(_b0)))
 249
 250 #define BITS5(_b4,_b3,_b2,_b1,_b0)  \
 251    (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
 252 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
 253    (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 254 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 255    (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 256
 257 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
 258    (((_b8) << 8) \
 259     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 260
 261 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 262    (((_b9) << 9) | ((_b8) << 8)                                \
 263     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 264
 265 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 266    ( ((_b10) << 10) | ((_b9) << 9) | ((_b8) << 8)              \
 267     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 268
 269 #define BITS12(_b11,_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 270    ( ((_b11) << 11) | ((_b10) << 10) | ((_b9) << 9) | ((_b8) << 8) \
 271     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 272
 273 /* produces _uint[_bMax:_bMin] */
 274 #define SLICE_UInt(_uint,_bMax,_bMin) \
 275    (( ((UInt)(_uint)) >> (_bMin)) \
 276     & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
 277
 278
 279 /*------------------------------------------------------------*/
 280 /*--- Helper bits and pieces for creating IR fragments.    ---*/
 281 /*------------------------------------------------------------*/
 282
 283 static IRExpr* mkU64 ( ULong i )
 284 {
 285    return IRExpr_Const(IRConst_U64(i));
 286 }
 287
 288 static IRExpr* mkU32 ( UInt i )
 289 {
 290    return IRExpr_Const(IRConst_U32(i));
 291 }
 292
 293 static IRExpr* mkU8 ( UInt i )
 294 {
 295    vassert(i < 256);
 296    return IRExpr_Const(IRConst_U8( (UChar)i ));
 297 }
 298
 299 static IRExpr* mkexpr ( IRTemp tmp )
 300 {
 301    return IRExpr_RdTmp(tmp);
 302 }
 303
 304 static IRExpr* unop ( IROp op, IRExpr* a )
 305 {
 306    return IRExpr_Unop(op, a);
 307 }
 308
 309 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
 310 {
 311    return IRExpr_Binop(op, a1, a2);
 312 }
 313
 314 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
 315 {
 316    return IRExpr_Triop(op, a1, a2, a3);
 317 }
 318
 319 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
 320 {
 321    return IRExpr_Load(Iend_LE, ty, addr);
 322 }
 323
 324 /* Add a statement to the list held by "irbb". */
 325 static void stmt ( IRStmt* st )
 326 {
 327    addStmtToIRSB( irsb, st );
 328 }
 329
 330 static void assign ( IRTemp dst, IRExpr* e )
 331 {
 332    stmt( IRStmt_WrTmp(dst, e) );
 333 }
 334
 335 static void storeLE ( IRExpr* addr, IRExpr* data )
 336 {
 337    stmt( IRStmt_Store(Iend_LE, addr, data) );
 338 }
 339
 340 static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
 341 {
 342    if (guardT == IRTemp_INVALID) {
 343       /* unconditional */
 344       storeLE(addr, data);
 345    } else {
 346       stmt( IRStmt_StoreG(Iend_LE, addr, data,
 347                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
 348    }
 349 }
 350
 351 static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
 352                             IRExpr* addr, IRExpr* alt,
 353                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
 354 {
 355    if (guardT == IRTemp_INVALID) {
 356       /* unconditional */
 357       IRExpr* loaded = NULL;
 358       switch (cvt) {
 359          case ILGop_Ident32:
 360             loaded = loadLE(Ity_I32, addr); break;
 361          case ILGop_8Uto32:
 362             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
 363          case ILGop_8Sto32:
 364             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
 365          case ILGop_16Uto32:
 366             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
 367          case ILGop_16Sto32:
 368             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
 369          default:
 370             vassert(0);
 371       }
 372       vassert(loaded != NULL);
 373       assign(dst, loaded);
 374    } else {
 375       /* Generate a guarded load into 'dst', but apply 'cvt' to the
 376          loaded data before putting the data in 'dst'.  If the load
 377          does not take place, 'alt' is placed directly in 'dst'. */
 378       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
 379                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
 380    }
 381 }
 382
 383 /* Generate a new temporary of the given type. */
 384 static IRTemp newTemp ( IRType ty )
 385 {
 386    vassert(isPlausibleIRType(ty));
 387    return newIRTemp( irsb->tyenv, ty );
 388 }
 389
 390 /* Produces a value in 0 .. 3, which is encoded as per the type
 391    IRRoundingMode. */
 392 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
 393 {
 394    return mkU32(Irrm_NEAREST);
 395 }
 396
 397 /* Generate an expression for SRC rotated right by ROT. */
 398 static IRExpr* genROR32( IRTemp src, Int rot )
 399 {
 400    vassert(rot >= 0 && rot < 32);
 401    if (rot == 0)
 402       return mkexpr(src);
 403    return
 404       binop(Iop_Or32,
 405             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
 406             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
 407 }
 408
 409 static IRExpr* mkU128 ( ULong i )
 410 {
 411    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
 412 }
 413
 414 /* Generate a 4-aligned version of the given expression if
 415    the given condition is true.  Else return it unchanged. */
 416 static IRExpr* align4if ( IRExpr* e, Bool b )
 417 {
 418    if (b)
 419       return binop(Iop_And32, e, mkU32(~3));
 420    else
 421       return e;
 422 }
 423
 424
 425 /*------------------------------------------------------------*/
 426 /*--- Helpers for accessing guest registers.               ---*/
 427 /*------------------------------------------------------------*/
 428
 429 #define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
 430 #define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
 431 #define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
 432 #define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
 433 #define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
 434 #define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
 435 #define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
 436 #define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
 437 #define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
 438 #define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
 439 #define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
 440 #define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
 441 #define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
 442 #define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
 443 #define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
 444 #define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
 445
 446 #define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
 447 #define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
 448 #define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
 449 #define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
 450 #define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
 451
 452 #define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
 453 #define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
 454 #define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
 455 #define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
 456 #define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
 457 #define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
 458 #define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
 459 #define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
 460 #define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
 461 #define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
 462 #define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
 463 #define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
 464 #define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
 465 #define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
 466 #define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
 467 #define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
 468 #define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
 469 #define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
 470 #define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
 471 #define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
 472 #define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
 473 #define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
 474 #define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
 475 #define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
 476 #define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
 477 #define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
 478 #define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
 479 #define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
 480 #define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
 481 #define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
 482 #define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
 483 #define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
 484
 485 #define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
 486 #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
 487 #define OFFB_TPIDRURW offsetof(VexGuestARMState,guest_TPIDRURW)
 488 #define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
 489 #define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
 490 #define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
 491 #define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
 492 #define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
 493 #define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
 494
 495 #define OFFB_CMSTART  offsetof(VexGuestARMState,guest_CMSTART)
 496 #define OFFB_CMLEN    offsetof(VexGuestARMState,guest_CMLEN)
 497
 498
 499 /* ---------------- Integer registers ---------------- */
 500
 501 static Int integerGuestRegOffset ( UInt iregNo )
 502 {
 503    /* Do we care about endianness here?  We do if sub-parts of integer
 504       registers are accessed, but I don't think that ever happens on
 505       ARM. */
 506    switch (iregNo) {
 507       case 0:  return OFFB_R0;
 508       case 1:  return OFFB_R1;
 509       case 2:  return OFFB_R2;
 510       case 3:  return OFFB_R3;
 511       case 4:  return OFFB_R4;
 512       case 5:  return OFFB_R5;
 513       case 6:  return OFFB_R6;
 514       case 7:  return OFFB_R7;
 515       case 8:  return OFFB_R8;
 516       case 9:  return OFFB_R9;
 517       case 10: return OFFB_R10;
 518       case 11: return OFFB_R11;
 519       case 12: return OFFB_R12;
 520       case 13: return OFFB_R13;
 521       case 14: return OFFB_R14;
 522       case 15: return OFFB_R15T;
 523       default: vassert(0);
 524    }
 525 }
 526
 527 /* Plain ("low level") read from a reg; no +8 offset magic for r15. */
 528 static IRExpr* llGetIReg ( UInt iregNo )
 529 {
 530    vassert(iregNo < 16);
 531    return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
 532 }
 533
 534 /* Architected read from a reg in ARM mode.  This automagically adds 8
 535    to all reads of r15. */
 536 static IRExpr* getIRegA ( UInt iregNo )
 537 {
 538    IRExpr* e;
 539    ASSERT_IS_ARM;
 540    vassert(iregNo < 16);
 541    if (iregNo == 15) {
 542       /* If asked for r15, don't read the guest state value, as that
 543          may not be up to date in the case where loop unrolling has
 544          happened, because the first insn's write to the block is
 545          omitted; hence in the 2nd and subsequent unrollings we don't
 546          have a correct value in guest r15.  Instead produce the
 547          constant that we know would be produced at this point. */
 548       vassert(0 == (guest_R15_curr_instr_notENC & 3));
 549       e = mkU32(guest_R15_curr_instr_notENC + 8);
 550    } else {
 551       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
 552    }
 553    return e;
 554 }
 555
 556 /* Architected read from a reg in Thumb mode.  This automagically adds
 557    4 to all reads of r15. */
 558 static IRExpr* getIRegT ( UInt iregNo )
 559 {
 560    IRExpr* e;
 561    ASSERT_IS_THUMB;
 562    vassert(iregNo < 16);
 563    if (iregNo == 15) {
 564       /* Ditto comment in getIReg. */
 565       vassert(0 == (guest_R15_curr_instr_notENC & 1));
 566       e = mkU32(guest_R15_curr_instr_notENC + 4);
 567    } else {
 568       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
 569    }
 570    return e;
 571 }
 572
 573 /* Plain ("low level") write to a reg; no jump or alignment magic for
 574    r15. */
 575 static void llPutIReg ( UInt iregNo, IRExpr* e )
 576 {
 577    vassert(iregNo < 16);
 578    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
 579    stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
 580 }
 581
 582 /* Architected write to an integer register in ARM mode.  If it is to
 583    r15, record info so at the end of this insn's translation, a branch
 584    to it can be made.  Also handles conditional writes to the
 585    register: if guardT == IRTemp_INVALID then the write is
 586    unconditional.  If writing r15, also 4-align it. */
 587 static void putIRegA ( UInt       iregNo,
 588                        IRExpr*    e,
 589                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
 590                        IRJumpKind jk /* if a jump is generated */ )
 591 {
 592    /* if writing r15, force e to be 4-aligned. */
 593    // INTERWORKING FIXME.  this needs to be relaxed so that
 594    // puts caused by LDMxx which load r15 interwork right.
 595    // but is no aligned too relaxed?
 596    //if (iregNo == 15)
 597    //   e = binop(Iop_And32, e, mkU32(~3));
 598    ASSERT_IS_ARM;
 599    /* So, generate either an unconditional or a conditional write to
 600       the reg. */
 601    if (guardT == IRTemp_INVALID) {
 602       /* unconditional write */
 603       llPutIReg( iregNo, e );
 604    } else {
 605       llPutIReg( iregNo,
 606                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 607                              e, llGetIReg(iregNo) ));
 608    }
 609    if (iregNo == 15) {
 610       // assert against competing r15 updates.  Shouldn't
 611       // happen; should be ruled out by the instr matching
 612       // logic.
 613       vassert(r15written == False);
 614       vassert(r15guard   == IRTemp_INVALID);
 615       vassert(r15kind    == Ijk_Boring);
 616       r15written = True;
 617       r15guard   = guardT;
 618       r15kind    = jk;
 619    }
 620 }
 621
 622
 623 /* Architected write to an integer register in Thumb mode.  Writes to
 624    r15 are not allowed.  Handles conditional writes to the register:
 625    if guardT == IRTemp_INVALID then the write is unconditional. */
 626 static void putIRegT ( UInt       iregNo,
 627                        IRExpr*    e,
 628                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
 629 {
 630    /* So, generate either an unconditional or a conditional write to
 631       the reg. */
 632    ASSERT_IS_THUMB;
 633    vassert(iregNo >= 0 && iregNo <= 14);
 634    if (guardT == IRTemp_INVALID) {
 635       /* unconditional write */
 636       llPutIReg( iregNo, e );
 637    } else {
 638       llPutIReg( iregNo,
 639                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 640                              e, llGetIReg(iregNo) ));
 641    }
 642 }
 643
 644
 645 /* Thumb16 and Thumb32 only.
 646    Returns true if reg is 13 or 15.  Implements the BadReg
 647    predicate in the ARM ARM. */
 648 static Bool isBadRegT ( UInt r )
 649 {
 650    vassert(r <= 15);
 651    ASSERT_IS_THUMB;
 652    return r == 13 || r == 15;
 653 }
 654
 655
 656 /* ---------------- Double registers ---------------- */
 657
 658 static Int doubleGuestRegOffset ( UInt dregNo )
 659 {
 660    /* Do we care about endianness here?  Probably do if we ever get
 661       into the situation of dealing with the single-precision VFP
 662       registers. */
 663    switch (dregNo) {
 664       case 0:  return OFFB_D0;
 665       case 1:  return OFFB_D1;
 666       case 2:  return OFFB_D2;
 667       case 3:  return OFFB_D3;
 668       case 4:  return OFFB_D4;
 669       case 5:  return OFFB_D5;
 670       case 6:  return OFFB_D6;
 671       case 7:  return OFFB_D7;
 672       case 8:  return OFFB_D8;
 673       case 9:  return OFFB_D9;
 674       case 10: return OFFB_D10;
 675       case 11: return OFFB_D11;
 676       case 12: return OFFB_D12;
 677       case 13: return OFFB_D13;
 678       case 14: return OFFB_D14;
 679       case 15: return OFFB_D15;
 680       case 16: return OFFB_D16;
 681       case 17: return OFFB_D17;
 682       case 18: return OFFB_D18;
 683       case 19: return OFFB_D19;
 684       case 20: return OFFB_D20;
 685       case 21: return OFFB_D21;
 686       case 22: return OFFB_D22;
 687       case 23: return OFFB_D23;
 688       case 24: return OFFB_D24;
 689       case 25: return OFFB_D25;
 690       case 26: return OFFB_D26;
 691       case 27: return OFFB_D27;
 692       case 28: return OFFB_D28;
 693       case 29: return OFFB_D29;
 694       case 30: return OFFB_D30;
 695       case 31: return OFFB_D31;
 696       default: vassert(0);
 697    }
 698 }
 699
 700 /* Plain ("low level") read from a VFP Dreg. */
 701 static IRExpr* llGetDReg ( UInt dregNo )
 702 {
 703    vassert(dregNo < 32);
 704    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
 705 }
 706
 707 /* Architected read from a VFP Dreg. */
 708 static IRExpr* getDReg ( UInt dregNo ) {
 709    return llGetDReg( dregNo );
 710 }
 711
 712 /* Plain ("low level") write to a VFP Dreg. */
 713 static void llPutDReg ( UInt dregNo, IRExpr* e )
 714 {
 715    vassert(dregNo < 32);
 716    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
 717    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
 718 }
 719
 720 /* Architected write to a VFP Dreg.  Handles conditional writes to the
 721    register: if guardT == IRTemp_INVALID then the write is
 722    unconditional. */
 723 static void putDReg ( UInt    dregNo,
 724                       IRExpr* e,
 725                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 726 {
 727    /* So, generate either an unconditional or a conditional write to
 728       the reg. */
 729    if (guardT == IRTemp_INVALID) {
 730       /* unconditional write */
 731       llPutDReg( dregNo, e );
 732    } else {
 733       llPutDReg( dregNo,
 734                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 735                              e, llGetDReg(dregNo) ));
 736    }
 737 }
 738
 739 /* And now exactly the same stuff all over again, but this time
 740    taking/returning I64 rather than F64, to support 64-bit Neon
 741    ops. */
 742
 743 /* Plain ("low level") read from a Neon Integer Dreg. */
 744 static IRExpr* llGetDRegI64 ( UInt dregNo )
 745 {
 746    vassert(dregNo < 32);
 747    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
 748 }
 749
 750 /* Architected read from a Neon Integer Dreg. */
 751 static IRExpr* getDRegI64 ( UInt dregNo ) {
 752    return llGetDRegI64( dregNo );
 753 }
 754
 755 /* Plain ("low level") write to a Neon Integer Dreg. */
 756 static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
 757 {
 758    vassert(dregNo < 32);
 759    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
 760    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
 761 }
 762
 763 /* Architected write to a Neon Integer Dreg.  Handles conditional
 764    writes to the register: if guardT == IRTemp_INVALID then the write
 765    is unconditional. */
 766 static void putDRegI64 ( UInt    dregNo,
 767                          IRExpr* e,
 768                          IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 769 {
 770    /* So, generate either an unconditional or a conditional write to
 771       the reg. */
 772    if (guardT == IRTemp_INVALID) {
 773       /* unconditional write */
 774       llPutDRegI64( dregNo, e );
 775    } else {
 776       llPutDRegI64( dregNo,
 777                     IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 778                                 e, llGetDRegI64(dregNo) ));
 779    }
 780 }
 781
 782 /* ---------------- Quad registers ---------------- */
 783
 784 static Int quadGuestRegOffset ( UInt qregNo )
 785 {
 786    /* Do we care about endianness here?  Probably do if we ever get
 787       into the situation of dealing with the 64 bit Neon registers. */
 788    switch (qregNo) {
 789       case 0:  return OFFB_D0;
 790       case 1:  return OFFB_D2;
 791       case 2:  return OFFB_D4;
 792       case 3:  return OFFB_D6;
 793       case 4:  return OFFB_D8;
 794       case 5:  return OFFB_D10;
 795       case 6:  return OFFB_D12;
 796       case 7:  return OFFB_D14;
 797       case 8:  return OFFB_D16;
 798       case 9:  return OFFB_D18;
 799       case 10: return OFFB_D20;
 800       case 11: return OFFB_D22;
 801       case 12: return OFFB_D24;
 802       case 13: return OFFB_D26;
 803       case 14: return OFFB_D28;
 804       case 15: return OFFB_D30;
 805       default: vassert(0);
 806    }
 807 }
 808
 809 /* Plain ("low level") read from a Neon Qreg. */
 810 static IRExpr* llGetQReg ( UInt qregNo )
 811 {
 812    vassert(qregNo < 16);
 813    return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
 814 }
 815
 816 /* Architected read from a Neon Qreg. */
 817 static IRExpr* getQReg ( UInt qregNo ) {
 818    return llGetQReg( qregNo );
 819 }
 820
 821 /* Plain ("low level") write to a Neon Qreg. */
 822 static void llPutQReg ( UInt qregNo, IRExpr* e )
 823 {
 824    vassert(qregNo < 16);
 825    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
 826    stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
 827 }
 828
 829 /* Architected write to a Neon Qreg.  Handles conditional writes to the
 830    register: if guardT == IRTemp_INVALID then the write is
 831    unconditional. */
 832 static void putQReg ( UInt    qregNo,
 833                       IRExpr* e,
 834                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 835 {
 836    /* So, generate either an unconditional or a conditional write to
 837       the reg. */
 838    if (guardT == IRTemp_INVALID) {
 839       /* unconditional write */
 840       llPutQReg( qregNo, e );
 841    } else {
 842       llPutQReg( qregNo,
 843                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 844                              e, llGetQReg(qregNo) ));
 845    }
 846 }
 847
 848
 849 /* ---------------- Float registers ---------------- */
 850
 851 static Int floatGuestRegOffset ( UInt fregNo )
 852 {
 853    /* Start with the offset of the containing double, and then correct
 854       for endianness.  Actually this is completely bogus and needs
 855       careful thought. */
 856    Int off;
 857    /* NB! Limit is 64, not 32, because we might be pulling F32 bits
 858       out of SIMD registers, and there are 16 SIMD registers each of
 859       128 bits (4 x F32). */
 860    vassert(fregNo < 64);
 861    off = doubleGuestRegOffset(fregNo >> 1);
 862    if (host_endness == VexEndnessLE) {
 863       if (fregNo & 1)
 864          off += 4;
 865    } else {
 866       vassert(0);
 867    }
 868    return off;
 869 }
 870
 871 /* Plain ("low level") read from a VFP Freg. */
 872 static IRExpr* llGetFReg ( UInt fregNo )
 873 {
 874    vassert(fregNo < 32);
 875    return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
 876 }
 877
 878 static IRExpr* llGetFReg_up_to_64 ( UInt fregNo )
 879 {
 880    vassert(fregNo < 64);
 881    return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
 882 }
 883
 884 /* Architected read from a VFP Freg. */
 885 static IRExpr* getFReg ( UInt fregNo ) {
 886    return llGetFReg( fregNo );
 887 }
 888
 889 /* Plain ("low level") write to a VFP Freg. */
 890 static void llPutFReg ( UInt fregNo, IRExpr* e )
 891 {
 892    vassert(fregNo < 32);
 893    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
 894    stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
 895 }
 896
 897 static void llPutFReg_up_to_64 ( UInt fregNo, IRExpr* e )
 898 {
 899    vassert(fregNo < 64);
 900    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
 901    stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
 902 }
 903
 904 /* Architected write to a VFP Freg.  Handles conditional writes to the
 905    register: if guardT == IRTemp_INVALID then the write is
 906    unconditional. */
 907 static void putFReg ( UInt    fregNo,
 908                       IRExpr* e,
 909                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 910 {
 911    /* So, generate either an unconditional or a conditional write to
 912       the reg. */
 913    if (guardT == IRTemp_INVALID) {
 914       /* unconditional write */
 915       llPutFReg( fregNo, e );
 916    } else {
 917       llPutFReg( fregNo,
 918                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 919                              e, llGetFReg(fregNo) ));
 920    }
 921 }
 922
 923
 924 /* ---------------- Misc registers ---------------- */
 925
 926 static void putMiscReg32 ( UInt    gsoffset,
 927                            IRExpr* e, /* :: Ity_I32 */
 928                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 929 {
 930    switch (gsoffset) {
 931       case OFFB_FPSCR:   break;
 932       case OFFB_QFLAG32: break;
 933       case OFFB_GEFLAG0: break;
 934       case OFFB_GEFLAG1: break;
 935       case OFFB_GEFLAG2: break;
 936       case OFFB_GEFLAG3: break;
 937       case OFFB_TPIDRURW: break;
 938       default: vassert(0); /* awaiting more cases */
 939    }
 940    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
 941
 942    if (guardT == IRTemp_INVALID) {
 943       /* unconditional write */
 944       stmt(IRStmt_Put(gsoffset, e));
 945    } else {
 946       stmt(IRStmt_Put(
 947          gsoffset,
 948          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 949                      e, IRExpr_Get(gsoffset, Ity_I32) )
 950       ));
 951    }
 952 }
 953
 954 static IRTemp get_ITSTATE ( void )
 955 {
 956    ASSERT_IS_THUMB;
 957    IRTemp t = newTemp(Ity_I32);
 958    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
 959    return t;
 960 }
 961
 962 static void put_ITSTATE ( IRTemp t )
 963 {
 964    ASSERT_IS_THUMB;
 965    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
 966 }
 967
 968 static IRTemp get_QFLAG32 ( void )
 969 {
 970    IRTemp t = newTemp(Ity_I32);
 971    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
 972    return t;
 973 }
 974
 975 static void put_QFLAG32 ( IRTemp t, IRTemp condT )
 976 {
 977    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
 978 }
 979
 980 /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
 981    Status Register) to indicate that overflow or saturation occurred.
 982    Nb: t must be zero to denote no saturation, and any nonzero
 983    value to indicate saturation. */
 984 static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
 985 {
 986    IRTemp old = get_QFLAG32();
 987    IRTemp nyu = newTemp(Ity_I32);
 988    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
 989    put_QFLAG32(nyu, condT);
 990 }
 991
 992 /* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
 993    flagNo: which flag bit to set [3...0]
 994    lowbits_to_ignore:  0 = look at all 32 bits
 995                        8 = look at top 24 bits only
 996                       16 = look at top 16 bits only
 997                       31 = look at the top bit only
 998    e: input value to be evaluated.
 999    The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
1000    masked out.  If the resulting value is zero then the GE flag is
1001    set to 0; any other value sets the flag to 1. */
1002 static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
1003                            Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
1004                            IRExpr* e,             /* Ity_I32 */
1005                            IRTemp condT )
1006 {
1007    vassert( flagNo >= 0 && flagNo <= 3 );
1008    vassert( lowbits_to_ignore == 0  ||
1009             lowbits_to_ignore == 8  ||
1010             lowbits_to_ignore == 16 ||
1011             lowbits_to_ignore == 31 );
1012    IRTemp masked = newTemp(Ity_I32);
1013    assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
1014
1015    switch (flagNo) {
1016       case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
1017       case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
1018       case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
1019       case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
1020       default: vassert(0);
1021    }
1022 }
1023
1024 /* Return the (32-bit, zero-or-nonzero representation scheme) of
1025    the specified GE flag. */
1026 static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
1027 {
1028    switch (flagNo) {
1029       case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
1030       case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
1031       case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
1032       case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
1033       default: vassert(0);
1034    }
1035 }
1036
1037 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
1038    2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
1039    15 of the value.  All other bits are ignored. */
1040 static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
1041 {
1042    IRTemp ge10 = newTemp(Ity_I32);
1043    IRTemp ge32 = newTemp(Ity_I32);
1044    assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1045    assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1046    put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
1047    put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
1048    put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
1049    put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
1050 }
1051
1052
1053 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3
1054    from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
1055    bit 7.  All other bits are ignored. */
1056 static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
1057 {
1058    IRTemp ge0 = newTemp(Ity_I32);
1059    IRTemp ge1 = newTemp(Ity_I32);
1060    IRTemp ge2 = newTemp(Ity_I32);
1061    IRTemp ge3 = newTemp(Ity_I32);
1062    assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
1063    assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1064    assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
1065    assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1066    put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
1067    put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
1068    put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
1069    put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
1070 }
1071
1072
1073 /* ---------------- FPSCR stuff ---------------- */
1074
1075 /* Generate IR to get hold of the rounding mode bits in FPSCR, and
1076    convert them to IR format.  Bind the final result to the
1077    returned temp. */
1078 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1079 {
1080    /* The ARMvfp encoding for rounding mode bits is:
1081          00  to nearest
1082          01  to +infinity
1083          10  to -infinity
1084          11  to zero
1085       We need to convert that to the IR encoding:
1086          00  to nearest (the default)
1087          10  to +infinity
1088          01  to -infinity
1089          11  to zero
1090       Which can be done by swapping bits 0 and 1.
1091       The rmode bits are at 23:22 in FPSCR.
1092    */
1093    IRTemp armEncd = newTemp(Ity_I32);
1094    IRTemp swapped = newTemp(Ity_I32);
1095    /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
1096       we don't zero out bits 24 and above, since the assignment to
1097       'swapped' will mask them out anyway. */
1098    assign(armEncd,
1099           binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
1100    /* Now swap them. */
1101    assign(swapped,
1102           binop(Iop_Or32,
1103                 binop(Iop_And32,
1104                       binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1105                       mkU32(2)),
1106                 binop(Iop_And32,
1107                       binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1108                       mkU32(1))
1109          ));
1110    return swapped;
1111 }
1112
1113
1114 /*------------------------------------------------------------*/
1115 /*--- Helpers for flag handling and conditional insns      ---*/
1116 /*------------------------------------------------------------*/
1117
1118 static const HChar* name_ARMCondcode ( ARMCondcode cond )
1119 {
1120    switch (cond) {
1121       case ARMCondEQ:  return "{eq}";
1122       case ARMCondNE:  return "{ne}";
1123       case ARMCondHS:  return "{hs}";  // or 'cs'
1124       case ARMCondLO:  return "{lo}";  // or 'cc'
1125       case ARMCondMI:  return "{mi}";
1126       case ARMCondPL:  return "{pl}";
1127       case ARMCondVS:  return "{vs}";
1128       case ARMCondVC:  return "{vc}";
1129       case ARMCondHI:  return "{hi}";
1130       case ARMCondLS:  return "{ls}";
1131       case ARMCondGE:  return "{ge}";
1132       case ARMCondLT:  return "{lt}";
1133       case ARMCondGT:  return "{gt}";
1134       case ARMCondLE:  return "{le}";
1135       case ARMCondAL:  return ""; // {al}: is the default
1136       case ARMCondNV:  return "{nv}";
1137       default: vpanic("name_ARMCondcode");
1138    }
1139 }
1140 /* and a handy shorthand for it */
1141 static const HChar* nCC ( ARMCondcode cond ) {
1142    return name_ARMCondcode(cond);
1143 }
1144
1145
1146 /* Build IR to calculate some particular condition from stored
1147    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1148    Ity_I32, suitable for narrowing.  Although the return type is
1149    Ity_I32, the returned value is either 0 or 1.  'cond' must be
1150    :: Ity_I32 and must denote the condition to compute in
1151    bits 7:4, and be zero everywhere else.
1152 */
1153 static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
1154 {
1155    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
1156    /* And 'cond' had better produce a value in which only bits 7:4 are
1157       nonzero.  However, obviously we can't assert for that. */
1158
1159    /* So what we're constructing for the first argument is
1160       "(cond << 4) | stored-operation".
1161       However, as per comments above, 'cond' must be supplied
1162       pre-shifted to this function.
1163
1164       This pairing scheme requires that the ARM_CC_OP_ values all fit
1165       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
1166       8 bits of the first argument. */
1167    IRExpr** args
1168       = mkIRExprVec_4(
1169            binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
1170            IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1171            IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1172            IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
1173         );
1174    IRExpr* call
1175       = mkIRExprCCall(
1176            Ity_I32,
1177            0/*regparm*/,
1178            "armg_calculate_condition", &armg_calculate_condition,
1179            args
1180         );
1181
1182    /* Exclude the requested condition, OP and NDEP from definedness
1183       checking.  We're only interested in DEP1 and DEP2. */
1184    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1185    return call;
1186 }
1187
1188
1189 /* Build IR to calculate some particular condition from stored
1190    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1191    Ity_I32, suitable for narrowing.  Although the return type is
1192    Ity_I32, the returned value is either 0 or 1.
1193 */
1194 static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
1195 {
1196   /* First arg is "(cond << 4) | condition".  This requires that the
1197      ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
1198      (COND, OP) pair in the lowest 8 bits of the first argument. */
1199    vassert(cond >= 0 && cond <= 15);
1200    return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
1201 }
1202
1203
1204 /* Build IR to calculate just the carry flag from stored
1205    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1206    Ity_I32. */
1207 static IRExpr* mk_armg_calculate_flag_c ( void )
1208 {
1209    IRExpr** args
1210       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1211                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1212                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1213                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1214    IRExpr* call
1215       = mkIRExprCCall(
1216            Ity_I32,
1217            0/*regparm*/,
1218            "armg_calculate_flag_c", &armg_calculate_flag_c,
1219            args
1220         );
1221    /* Exclude OP and NDEP from definedness checking.  We're only
1222       interested in DEP1 and DEP2. */
1223    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1224    return call;
1225 }
1226
1227
1228 /* Build IR to calculate just the overflow flag from stored
1229    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1230    Ity_I32. */
1231 static IRExpr* mk_armg_calculate_flag_v ( void )
1232 {
1233    IRExpr** args
1234       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1235                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1236                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1237                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1238    IRExpr* call
1239       = mkIRExprCCall(
1240            Ity_I32,
1241            0/*regparm*/,
1242            "armg_calculate_flag_v", &armg_calculate_flag_v,
1243            args
1244         );
1245    /* Exclude OP and NDEP from definedness checking.  We're only
1246       interested in DEP1 and DEP2. */
1247    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1248    return call;
1249 }
1250
1251
1252 /* Build IR to calculate N Z C V in bits 31:28 of the
1253    returned word. */
1254 static IRExpr* mk_armg_calculate_flags_nzcv ( void )
1255 {
1256    IRExpr** args
1257       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1258                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1259                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1260                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1261    IRExpr* call
1262       = mkIRExprCCall(
1263            Ity_I32,
1264            0/*regparm*/,
1265            "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
1266            args
1267         );
1268    /* Exclude OP and NDEP from definedness checking.  We're only
1269       interested in DEP1 and DEP2. */
1270    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1271    return call;
1272 }
1273
1274 static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
1275 {
1276    IRExpr** args1;
1277    IRExpr** args2;
1278    IRExpr *call1, *call2, *res;
1279
1280    if (Q) {
1281       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
1282                               binop(Iop_GetElem32x4, resL, mkU8(1)),
1283                               binop(Iop_GetElem32x4, resR, mkU8(0)),
1284                               binop(Iop_GetElem32x4, resR, mkU8(1)) );
1285       args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
1286                               binop(Iop_GetElem32x4, resL, mkU8(3)),
1287                               binop(Iop_GetElem32x4, resR, mkU8(2)),
1288                               binop(Iop_GetElem32x4, resR, mkU8(3)) );
1289    } else {
1290       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
1291                               binop(Iop_GetElem32x2, resL, mkU8(1)),
1292                               binop(Iop_GetElem32x2, resR, mkU8(0)),
1293                               binop(Iop_GetElem32x2, resR, mkU8(1)) );
1294    }
1295
1296    call1 = mkIRExprCCall(
1297              Ity_I32,
1298              0/*regparm*/,
1299              "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1300              args1
1301           );
1302    if (Q) {
1303       call2 = mkIRExprCCall(
1304                 Ity_I32,
1305                 0/*regparm*/,
1306                 "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1307                 args2
1308              );
1309    }
1310    if (Q) {
1311       res = binop(Iop_Or32, call1, call2);
1312    } else {
1313       res = call1;
1314    }
1315    return res;
1316 }
1317
1318 // FIXME: this is named wrongly .. looks like a sticky set of
1319 // QC, not a write to it.
1320 static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
1321                          IRTemp condT )
1322 {
1323    putMiscReg32 (OFFB_FPSCR,
1324                  binop(Iop_Or32,
1325                        IRExpr_Get(OFFB_FPSCR, Ity_I32),
1326                        binop(Iop_Shl32,
1327                              mk_armg_calculate_flag_qc(resL, resR, Q),
1328                              mkU8(27))),
1329                  condT);
1330 }
1331
1332 /* Build IR to conditionally set the flags thunk.  As with putIReg, if
1333    guard is IRTemp_INVALID then it's unconditional, else it holds a
1334    condition :: Ity_I32. */
1335 static
1336 void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
1337                          IRTemp t_dep2, IRTemp t_ndep,
1338                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1339 {
1340    vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
1341    vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
1342    vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
1343    vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
1344    if (guardT == IRTemp_INVALID) {
1345       /* unconditional */
1346       stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
1347       stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1348       stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1349       stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1350    } else {
1351       /* conditional */
1352       IRTemp c1 = newTemp(Ity_I1);
1353       assign( c1, binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)) );
1354       stmt( IRStmt_Put(
1355                OFFB_CC_OP,
1356                IRExpr_ITE( mkexpr(c1),
1357                            mkU32(cc_op),
1358                            IRExpr_Get(OFFB_CC_OP, Ity_I32) ) ));
1359       stmt( IRStmt_Put(
1360                OFFB_CC_DEP1,
1361                IRExpr_ITE( mkexpr(c1),
1362                            mkexpr(t_dep1),
1363                            IRExpr_Get(OFFB_CC_DEP1, Ity_I32) ) ));
1364       stmt( IRStmt_Put(
1365                OFFB_CC_DEP2,
1366                IRExpr_ITE( mkexpr(c1),
1367                            mkexpr(t_dep2),
1368                            IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ) ));
1369       stmt( IRStmt_Put(
1370                OFFB_CC_NDEP,
1371                IRExpr_ITE( mkexpr(c1),
1372                            mkexpr(t_ndep),
1373                            IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ) ));
1374    }
1375 }
1376
1377
1378 /* Minor variant of the above that sets NDEP to zero (if it
1379    sets it at all) */
1380 static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1381                              IRTemp t_dep2,
1382                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1383 {
1384    IRTemp z32 = newTemp(Ity_I32);
1385    assign( z32, mkU32(0) );
1386    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1387 }
1388
1389
1390 /* Minor variant of the above that sets DEP2 to zero (if it
1391    sets it at all) */
1392 static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1393                              IRTemp t_ndep,
1394                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1395 {
1396    IRTemp z32 = newTemp(Ity_I32);
1397    assign( z32, mkU32(0) );
1398    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1399 }
1400
1401
1402 /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1403    sets them at all) */
1404 static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1405                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1406 {
1407    IRTemp z32 = newTemp(Ity_I32);
1408    assign( z32, mkU32(0) );
1409    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1410 }
1411
1412
1413 /* ARM only */
1414 /* Generate a side-exit to the next instruction, if the given guard
1415    expression :: Ity_I32 is 0 (note!  the side exit is taken if the
1416    condition is false!)  This is used to skip over conditional
1417    instructions which we can't generate straight-line code for, either
1418    because they are too complex or (more likely) they potentially
1419    generate exceptions.
1420 */
1421 static void mk_skip_over_A32_if_cond_is_false (
1422                IRTemp guardT /* :: Ity_I32, 0 or 1 */
1423             )
1424 {
1425    ASSERT_IS_ARM;
1426    vassert(guardT != IRTemp_INVALID);
1427    vassert(0 == (guest_R15_curr_instr_notENC & 3));
1428    stmt( IRStmt_Exit(
1429             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1430             Ijk_Boring,
1431             IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
1432             OFFB_R15T
1433        ));
1434 }
1435
1436 /* Thumb16 only */
1437 /* ditto, but jump over a 16-bit thumb insn */
1438 static void mk_skip_over_T16_if_cond_is_false (
1439                IRTemp guardT /* :: Ity_I32, 0 or 1 */
1440             )
1441 {
1442    ASSERT_IS_THUMB;
1443    vassert(guardT != IRTemp_INVALID);
1444    vassert(0 == (guest_R15_curr_instr_notENC & 1));
1445    stmt( IRStmt_Exit(
1446             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1447             Ijk_Boring,
1448             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
1449             OFFB_R15T
1450        ));
1451 }
1452
1453
1454 /* Thumb32 only */
1455 /* ditto, but jump over a 32-bit thumb insn */
1456 static void mk_skip_over_T32_if_cond_is_false (
1457                IRTemp guardT /* :: Ity_I32, 0 or 1 */
1458             )
1459 {
1460    ASSERT_IS_THUMB;
1461    vassert(guardT != IRTemp_INVALID);
1462    vassert(0 == (guest_R15_curr_instr_notENC & 1));
1463    stmt( IRStmt_Exit(
1464             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1465             Ijk_Boring,
1466             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
1467             OFFB_R15T
1468        ));
1469 }
1470
1471
1472 /* Thumb16 and Thumb32 only
1473    Generate a SIGILL followed by a restart of the current instruction
1474    if the given temp is nonzero. */
1475 static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
1476 {
1477    ASSERT_IS_THUMB;
1478    vassert(t != IRTemp_INVALID);
1479    vassert(0 == (guest_R15_curr_instr_notENC & 1));
1480    stmt(
1481       IRStmt_Exit(
1482          binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
1483          Ijk_NoDecode,
1484          IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
1485          OFFB_R15T
1486       )
1487    );
1488 }
1489
1490
1491 /* Inspect the old_itstate, and generate a SIGILL if it indicates that
1492    we are currently in an IT block and are not the last in the block.
1493    This also rolls back guest_ITSTATE to its old value before the exit
1494    and restores it to its new value afterwards.  This is so that if
1495    the exit is taken, we have an up to date version of ITSTATE
1496    available.  Without doing that, we have no hope of making precise
1497    exceptions work. */
1498 static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
1499                IRTemp old_itstate /* :: Ity_I32 */,
1500                IRTemp new_itstate /* :: Ity_I32 */
1501             )
1502 {
1503    ASSERT_IS_THUMB;
1504    put_ITSTATE(old_itstate); // backout
1505    IRTemp guards_for_next3 = newTemp(Ity_I32);
1506    assign(guards_for_next3,
1507           binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
1508    gen_SIGILL_T_if_nonzero(guards_for_next3);
1509    put_ITSTATE(new_itstate); //restore
1510 }
1511
1512
1513 /* Simpler version of the above, which generates a SIGILL if
1514    we're anywhere within an IT block. */
1515 static void gen_SIGILL_T_if_in_ITBlock (
1516                IRTemp old_itstate /* :: Ity_I32 */,
1517                IRTemp new_itstate /* :: Ity_I32 */
1518             )
1519 {
1520    put_ITSTATE(old_itstate); // backout
1521    gen_SIGILL_T_if_nonzero(old_itstate);
1522    put_ITSTATE(new_itstate); //restore
1523 }
1524
1525
1526 /* Generate an APSR value, from the NZCV thunk, and
1527    from QFLAG32 and GEFLAG0 .. GEFLAG3. */
1528 static IRTemp synthesise_APSR ( void )
1529 {
1530    IRTemp res1 = newTemp(Ity_I32);
1531    // Get NZCV
1532    assign( res1, mk_armg_calculate_flags_nzcv() );
1533    // OR in the Q value
1534    IRTemp res2 = newTemp(Ity_I32);
1535    assign(
1536       res2,
1537       binop(Iop_Or32,
1538             mkexpr(res1),
1539             binop(Iop_Shl32,
1540                   unop(Iop_1Uto32,
1541                        binop(Iop_CmpNE32,
1542                              mkexpr(get_QFLAG32()),
1543                              mkU32(0))),
1544                   mkU8(ARMG_CC_SHIFT_Q)))
1545    );
1546    // OR in GE0 .. GE3
1547    IRExpr* ge0
1548       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
1549    IRExpr* ge1
1550       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
1551    IRExpr* ge2
1552       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
1553    IRExpr* ge3
1554       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
1555    IRTemp res3 = newTemp(Ity_I32);
1556    assign(res3,
1557           binop(Iop_Or32,
1558                 mkexpr(res2),
1559                 binop(Iop_Or32,
1560                       binop(Iop_Or32,
1561                             binop(Iop_Shl32, ge0, mkU8(16)),
1562                             binop(Iop_Shl32, ge1, mkU8(17))),
1563                       binop(Iop_Or32,
1564                             binop(Iop_Shl32, ge2, mkU8(18)),
1565                             binop(Iop_Shl32, ge3, mkU8(19))) )));
1566    return res3;
1567 }
1568
1569
1570 /* and the inverse transformation: given an APSR value,
1571    set the NZCV thunk, the Q flag, and the GE flags. */
1572 static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
1573                                 IRTemp apsrT, IRTemp condT )
1574 {
1575    vassert(write_nzcvq || write_ge);
1576    if (write_nzcvq) {
1577       // Do NZCV
1578       IRTemp immT = newTemp(Ity_I32);
1579       assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
1580       setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
1581       // Do Q
1582       IRTemp qnewT = newTemp(Ity_I32);
1583       assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
1584       put_QFLAG32(qnewT, condT);
1585    }
1586    if (write_ge) {
1587       // Do GE3..0
1588       put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
1589                    condT);
1590       put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
1591                    condT);
1592       put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
1593                    condT);
1594       put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
1595                    condT);
1596    }
1597 }
1598
1599
1600 /*------------------------------------------------------------*/
1601 /*--- Helpers for saturation                               ---*/
1602 /*------------------------------------------------------------*/
1603
1604 /* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
1605    (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
1606    (b) the floor is computed from the value of imm5.  these two fnsn
1607    should be commoned up. */
1608
1609 /* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
1610    Optionally return flag resQ saying whether saturation occurred.
1611    See definition in manual, section A2.2.1, page 41
1612    (bits(N), boolean) UnsignedSatQ( integer i, integer N )
1613    {
1614      if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
1615      elsif ( i < 0 )    { result = 0; saturated = TRUE; }
1616      else               { result = i; saturated = FALSE; }
1617      return ( result<N-1:0>, saturated );
1618    }
1619 */
1620 static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
1621                              IRTemp* resQ, /* OUT - Ity_I32  */
1622                              IRTemp regT,  /* value to clamp - Ity_I32 */
1623                              UInt imm5 )   /* saturation ceiling */
1624 {
1625    ULong ceil64  = (1ULL << imm5) - 1;    // (2^imm5)-1
1626    UInt  ceil    = (UInt)ceil64;
1627    UInt  floor   = 0;
1628
1629    IRTemp nd0 = newTemp(Ity_I32);
1630    IRTemp nd1 = newTemp(Ity_I32);
1631    IRTemp nd2 = newTemp(Ity_I1);
1632    IRTemp nd3 = newTemp(Ity_I32);
1633    IRTemp nd4 = newTemp(Ity_I32);
1634    IRTemp nd5 = newTemp(Ity_I1);
1635    IRTemp nd6 = newTemp(Ity_I32);
1636
1637    assign( nd0, mkexpr(regT) );
1638    assign( nd1, mkU32(ceil) );
1639    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1640    assign( nd3, IRExpr_ITE(mkexpr(nd2), mkexpr(nd1), mkexpr(nd0)) );
1641    assign( nd4, mkU32(floor) );
1642    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1643    assign( nd6, IRExpr_ITE(mkexpr(nd5), mkexpr(nd4), mkexpr(nd3)) );
1644    assign( *res, mkexpr(nd6) );
1645
1646    /* if saturation occurred, then resQ is set to some nonzero value
1647       if sat did not occur, resQ is guaranteed to be zero. */
1648    if (resQ) {
1649       assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1650    }
1651 }
1652
1653
1654 /* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
1655    Optionally return flag resQ saying whether saturation occurred.
1656    - see definition in manual, section A2.2.1, page 41
1657    (bits(N), boolean ) SignedSatQ( integer i, integer N )
1658    {
1659      if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
1660      elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
1661      else                      { result = i;           saturated = FALSE; }
1662      return ( result[N-1:0], saturated );
1663    }
1664 */
1665 static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
1666                            UInt imm5,      /* saturation ceiling */
1667                            IRTemp* res,    /* OUT - Ity_I32 */
1668                            IRTemp* resQ )  /* OUT - Ity_I32  */
1669 {
1670    Long ceil64  =  (1LL << (imm5-1)) - 1;  //  (2^(imm5-1))-1
1671    Long floor64 = -(1LL << (imm5-1));      // -(2^(imm5-1))
1672    Int  ceil    = (Int)ceil64;
1673    Int  floor   = (Int)floor64;
1674
1675    IRTemp nd0 = newTemp(Ity_I32);
1676    IRTemp nd1 = newTemp(Ity_I32);
1677    IRTemp nd2 = newTemp(Ity_I1);
1678    IRTemp nd3 = newTemp(Ity_I32);
1679    IRTemp nd4 = newTemp(Ity_I32);
1680    IRTemp nd5 = newTemp(Ity_I1);
1681    IRTemp nd6 = newTemp(Ity_I32);
1682
1683    assign( nd0, mkexpr(regT) );
1684    assign( nd1, mkU32(ceil) );
1685    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1686    assign( nd3, IRExpr_ITE( mkexpr(nd2), mkexpr(nd1), mkexpr(nd0) ) );
1687    assign( nd4, mkU32(floor) );
1688    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1689    assign( nd6, IRExpr_ITE( mkexpr(nd5), mkexpr(nd4), mkexpr(nd3) ) );
1690    assign( *res, mkexpr(nd6) );
1691
1692    /* if saturation occurred, then resQ is set to some nonzero value
1693       if sat did not occur, resQ is guaranteed to be zero. */
1694    if (resQ) {
1695      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1696    }
1697 }
1698
1699
1700 /* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
1701    overflow occurred for 32-bit addition.  Needs both args and the
1702    result.  HD p27. */
1703 static
1704 IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
1705                                       IRTemp argL, IRTemp argR )
1706 {
1707    IRTemp res = newTemp(Ity_I32);
1708    assign(res, resE);
1709    return
1710       binop( Iop_Shr32,
1711              binop( Iop_And32,
1712                     binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
1713                     binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
1714              mkU8(31) );
1715 }
1716
1717 /* Similarly .. also from HD p27 .. */
1718 static
1719 IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
1720                                       IRTemp argL, IRTemp argR )
1721 {
1722    IRTemp res = newTemp(Ity_I32);
1723    assign(res, resE);
1724    return
1725       binop( Iop_Shr32,
1726              binop( Iop_And32,
1727                     binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
1728                     binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )),
1729              mkU8(31) );
1730 }
1731
1732
1733 /*------------------------------------------------------------*/
1734 /*--- Larger helpers                                       ---*/
1735 /*------------------------------------------------------------*/
1736
1737 /* Compute both the result and new C flag value for a LSL by an imm5
1738    or by a register operand.  May generate reads of the old C value
1739    (hence only safe to use before any writes to guest state happen).
1740    Are factored out so can be used by both ARM and Thumb.
1741
1742    Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
1743    "res" (the result)  is a.k.a. "shop", shifter operand
1744    "newC" (the new C)  is a.k.a. "shco", shifter carry out
1745
1746    The calling convention for res and newC is a bit funny.  They could
1747    be passed by value, but instead are passed by ref.
1748
1749    The C (shco) value computed must be zero in bits 31:1, as the IR
1750    optimisations for flag handling (guest_arm_spechelper) rely on
1751    that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
1752    for it.  Same applies to all these functions that compute shco
1753    after a shift or rotate, not just this one.
1754 */
1755
1756 static void compute_result_and_C_after_LSL_by_imm5 (
1757                /*OUT*/HChar* buf,
1758                IRTemp* res,
1759                IRTemp* newC,
1760                IRTemp rMt, UInt shift_amt, /* operands */
1761                UInt rM      /* only for debug printing */
1762             )
1763 {
1764    if (shift_amt == 0) {
1765       if (newC) {
1766          assign( *newC, mk_armg_calculate_flag_c() );
1767       }
1768       assign( *res, mkexpr(rMt) );
1769       DIS(buf, "r%u", rM);
1770    } else {
1771       vassert(shift_amt >= 1 && shift_amt <= 31);
1772       if (newC) {
1773          assign( *newC,
1774                  binop(Iop_And32,
1775                        binop(Iop_Shr32, mkexpr(rMt),
1776                                         mkU8(32 - shift_amt)),
1777                        mkU32(1)));
1778       }
1779       assign( *res,
1780               binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
1781       DIS(buf, "r%u, LSL #%u", rM, shift_amt);
1782    }
1783 }
1784
1785
1786 static void compute_result_and_C_after_LSL_by_reg (
1787                /*OUT*/HChar* buf,
1788                IRTemp* res,
1789                IRTemp* newC,
1790                IRTemp rMt, IRTemp rSt,  /* operands */
1791                UInt rM,    UInt rS      /* only for debug printing */
1792             )
1793 {
1794    // shift left in range 0 .. 255
1795    // amt  = rS & 255
1796    // res  = amt < 32 ?  Rm << amt  : 0
1797    // newC = amt == 0     ? oldC  :
1798    //        amt in 1..32 ?  Rm[32-amt]  : 0
1799    IRTemp amtT = newTemp(Ity_I32);
1800    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1801    if (newC) {
1802       /* mux0X(amt == 0,
1803                mux0X(amt < 32,
1804                      0,
1805                      Rm[(32-amt) & 31]),
1806                oldC)
1807       */
1808       /* About the best you can do is pray that iropt is able
1809          to nuke most or all of the following junk. */
1810       IRTemp oldC = newTemp(Ity_I32);
1811       assign(oldC, mk_armg_calculate_flag_c() );
1812       assign(
1813          *newC,
1814          IRExpr_ITE(
1815             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1816             mkexpr(oldC),
1817             IRExpr_ITE(
1818                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1819                binop(Iop_And32,
1820                      binop(Iop_Shr32,
1821                            mkexpr(rMt),
1822                            unop(Iop_32to8,
1823                                 binop(Iop_And32,
1824                                       binop(Iop_Sub32,
1825                                             mkU32(32),
1826                                             mkexpr(amtT)),
1827                                       mkU32(31)
1828                                 )
1829                            )
1830                      ),
1831                      mkU32(1)
1832                      ),
1833                mkU32(0)
1834             )
1835          )
1836       );
1837    }
1838    // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1839    // Lhs of the & limits the shift to 31 bits, so as to
1840    // give known IR semantics.  Rhs of the & is all 1s for
1841    // Rs <= 31 and all 0s for Rs >= 32.
1842    assign(
1843       *res,
1844       binop(
1845          Iop_And32,
1846          binop(Iop_Shl32,
1847                mkexpr(rMt),
1848                unop(Iop_32to8,
1849                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1850          binop(Iop_Sar32,
1851                binop(Iop_Sub32,
1852                      mkexpr(amtT),
1853                      mkU32(32)),
1854                mkU8(31))));
1855     DIS(buf, "r%u, LSL r%u", rM, rS);
1856 }
1857
1858
1859 static void compute_result_and_C_after_LSR_by_imm5 (
1860                /*OUT*/HChar* buf,
1861                IRTemp* res,
1862                IRTemp* newC,
1863                IRTemp rMt, UInt shift_amt, /* operands */
1864                UInt rM      /* only for debug printing */
1865             )
1866 {
1867    if (shift_amt == 0) {
1868       // conceptually a 32-bit shift, however:
1869       // res  = 0
1870       // newC = Rm[31]
1871       if (newC) {
1872          assign( *newC,
1873                  binop(Iop_And32,
1874                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1875                        mkU32(1)));
1876       }
1877       assign( *res, mkU32(0) );
1878       DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
1879    } else {
1880       // shift in range 1..31
1881       // res  = Rm >>u shift_amt
1882       // newC = Rm[shift_amt - 1]
1883       vassert(shift_amt >= 1 && shift_amt <= 31);
1884       if (newC) {
1885          assign( *newC,
1886                  binop(Iop_And32,
1887                        binop(Iop_Shr32, mkexpr(rMt),
1888                                         mkU8(shift_amt - 1)),
1889                        mkU32(1)));
1890       }
1891       assign( *res,
1892               binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
1893       DIS(buf, "r%u, LSR #%u", rM, shift_amt);
1894    }
1895 }
1896
1897
1898 static void compute_result_and_C_after_LSR_by_reg (
1899                /*OUT*/HChar* buf,
1900                IRTemp* res,
1901                IRTemp* newC,
1902                IRTemp rMt, IRTemp rSt,  /* operands */
1903                UInt rM,    UInt rS      /* only for debug printing */
1904             )
1905 {
1906    // shift right in range 0 .. 255
1907    // amt = rS & 255
1908    // res  = amt < 32 ?  Rm >>u amt  : 0
1909    // newC = amt == 0     ? oldC  :
1910    //        amt in 1..32 ?  Rm[amt-1]  : 0
1911    IRTemp amtT = newTemp(Ity_I32);
1912    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1913    if (newC) {
1914       /* mux0X(amt == 0,
1915                mux0X(amt < 32,
1916                      0,
1917                      Rm[(amt-1) & 31]),
1918                oldC)
1919       */
1920       IRTemp oldC = newTemp(Ity_I32);
1921       assign(oldC, mk_armg_calculate_flag_c() );
1922       assign(
1923          *newC,
1924          IRExpr_ITE(
1925             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1926             mkexpr(oldC),
1927             IRExpr_ITE(
1928                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1929                binop(Iop_And32,
1930                      binop(Iop_Shr32,
1931                            mkexpr(rMt),
1932                            unop(Iop_32to8,
1933                                 binop(Iop_And32,
1934                                       binop(Iop_Sub32,
1935                                             mkexpr(amtT),
1936                                             mkU32(1)),
1937                                       mkU32(31)
1938                                 )
1939                            )
1940                      ),
1941                      mkU32(1)
1942                      ),
1943                mkU32(0)
1944             )
1945          )
1946       );
1947    }
1948    // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1949    // Lhs of the & limits the shift to 31 bits, so as to
1950    // give known IR semantics.  Rhs of the & is all 1s for
1951    // Rs <= 31 and all 0s for Rs >= 32.
1952    assign(
1953       *res,
1954       binop(
1955          Iop_And32,
1956          binop(Iop_Shr32,
1957                mkexpr(rMt),
1958                unop(Iop_32to8,
1959                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1960          binop(Iop_Sar32,
1961                binop(Iop_Sub32,
1962                      mkexpr(amtT),
1963                      mkU32(32)),
1964                mkU8(31))));
1965     DIS(buf, "r%u, LSR r%u", rM, rS);
1966 }
1967
1968
1969 static void compute_result_and_C_after_ASR_by_imm5 (
1970                /*OUT*/HChar* buf,
1971                IRTemp* res,
1972                IRTemp* newC,
1973                IRTemp rMt, UInt shift_amt, /* operands */
1974                UInt rM      /* only for debug printing */
1975             )
1976 {
1977    if (shift_amt == 0) {
1978       // conceptually a 32-bit shift, however:
1979       // res  = Rm >>s 31
1980       // newC = Rm[31]
1981       if (newC) {
1982          assign( *newC,
1983                  binop(Iop_And32,
1984                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1985                        mkU32(1)));
1986       }
1987       assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
1988       DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
1989    } else {
1990       // shift in range 1..31
1991       // res = Rm >>s shift_amt
1992       // newC = Rm[shift_amt - 1]
1993       vassert(shift_amt >= 1 && shift_amt <= 31);
1994       if (newC) {
1995          assign( *newC,
1996                  binop(Iop_And32,
1997                        binop(Iop_Shr32, mkexpr(rMt),
1998                                         mkU8(shift_amt - 1)),
1999                        mkU32(1)));
2000       }
2001       assign( *res,
2002               binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
2003       DIS(buf, "r%u, ASR #%u", rM, shift_amt);
2004    }
2005 }
2006
2007
2008 static void compute_result_and_C_after_ASR_by_reg (
2009                /*OUT*/HChar* buf,
2010                IRTemp* res,
2011                IRTemp* newC,
2012                IRTemp rMt, IRTemp rSt,  /* operands */
2013                UInt rM,    UInt rS      /* only for debug printing */
2014             )
2015 {
2016    // arithmetic shift right in range 0 .. 255
2017    // amt = rS & 255
2018    // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
2019    // newC = amt == 0     ? oldC  :
2020    //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
2021    IRTemp amtT = newTemp(Ity_I32);
2022    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
2023    if (newC) {
2024       /* mux0X(amt == 0,
2025                mux0X(amt < 32,
2026                      Rm[31],
2027                      Rm[(amt-1) & 31])
2028                oldC)
2029       */
2030       IRTemp oldC = newTemp(Ity_I32);
2031       assign(oldC, mk_armg_calculate_flag_c() );
2032       assign(
2033          *newC,
2034          IRExpr_ITE(
2035             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
2036             mkexpr(oldC),
2037             IRExpr_ITE(
2038                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
2039                binop(Iop_And32,
2040                      binop(Iop_Shr32,
2041                            mkexpr(rMt),
2042                            unop(Iop_32to8,
2043                                 binop(Iop_And32,
2044                                       binop(Iop_Sub32,
2045                                             mkexpr(amtT),
2046                                             mkU32(1)),
2047                                       mkU32(31)
2048                                 )
2049                            )
2050                      ),
2051                      mkU32(1)
2052                      ),
2053                binop(Iop_And32,
2054                      binop(Iop_Shr32,
2055                            mkexpr(rMt),
2056                            mkU8(31)
2057                      ),
2058                      mkU32(1)
2059                )
2060             )
2061          )
2062       );
2063    }
2064    // (Rm >>s (amt <u 32 ? amt : 31))
2065    assign(
2066       *res,
2067       binop(
2068          Iop_Sar32,
2069          mkexpr(rMt),
2070          unop(
2071             Iop_32to8,
2072             IRExpr_ITE(
2073                binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32)),
2074                mkexpr(amtT),
2075                mkU32(31)))));
2076     DIS(buf, "r%u, ASR r%u", rM, rS);
2077 }
2078
2079
2080 static void compute_result_and_C_after_ROR_by_reg (
2081                /*OUT*/HChar* buf,
2082                IRTemp* res,
2083                IRTemp* newC,
2084                IRTemp rMt, IRTemp rSt,  /* operands */
2085                UInt rM,    UInt rS      /* only for debug printing */
2086             )
2087 {
2088    // rotate right in range 0 .. 255
2089    // amt = rS & 255
2090    // shop =  Rm `ror` (amt & 31)
2091    // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
2092    IRTemp amtT = newTemp(Ity_I32);
2093    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
2094    IRTemp amt5T = newTemp(Ity_I32);
2095    assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
2096    IRTemp oldC = newTemp(Ity_I32);
2097    assign(oldC, mk_armg_calculate_flag_c() );
2098    if (newC) {
2099       assign(
2100          *newC,
2101          IRExpr_ITE(
2102             binop(Iop_CmpNE32, mkexpr(amtT), mkU32(0)),
2103             binop(Iop_And32,
2104                   binop(Iop_Shr32,
2105                         mkexpr(rMt),
2106                         unop(Iop_32to8,
2107                              binop(Iop_And32,
2108                                    binop(Iop_Sub32,
2109                                          mkexpr(amtT),
2110                                          mkU32(1)
2111                                    ),
2112                                    mkU32(31)
2113                              )
2114                         )
2115                   ),
2116                   mkU32(1)
2117             ),
2118             mkexpr(oldC)
2119          )
2120       );
2121    }
2122    assign(
2123       *res,
2124       IRExpr_ITE(
2125          binop(Iop_CmpNE32, mkexpr(amt5T), mkU32(0)),
2126          binop(Iop_Or32,
2127                binop(Iop_Shr32,
2128                      mkexpr(rMt),
2129                      unop(Iop_32to8, mkexpr(amt5T))
2130                ),
2131                binop(Iop_Shl32,
2132                      mkexpr(rMt),
2133                      unop(Iop_32to8,
2134                           binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
2135                      )
2136                )
2137                ),
2138          mkexpr(rMt)
2139       )
2140    );
2141    DIS(buf, "r%u, ROR r#%u", rM, rS);
2142 }
2143
2144
2145 /* Generate an expression corresponding to the immediate-shift case of
2146    a shifter operand.  This is used both for ARM and Thumb2.
2147
2148    Bind it to a temporary, and return that via *res.  If newC is
2149    non-NULL, also compute a value for the shifter's carry out (in the
2150    LSB of a word), bind it to a temporary, and return that via *shco.
2151
2152    Generates GETs from the guest state and is therefore not safe to
2153    use once we start doing PUTs to it, for any given instruction.
2154
2155    'how' is encoded thusly:
2156       00b LSL,  01b LSR,  10b ASR,  11b ROR
2157    Most but not all ARM and Thumb integer insns use this encoding.
2158    Be careful to ensure the right value is passed here.
2159 */
2160 static void compute_result_and_C_after_shift_by_imm5 (
2161                /*OUT*/HChar* buf,
2162                /*OUT*/IRTemp* res,
2163                /*OUT*/IRTemp* newC,
2164                IRTemp  rMt,       /* reg to shift */
2165                UInt    how,       /* what kind of shift */
2166                UInt    shift_amt, /* shift amount (0..31) */
2167                UInt    rM         /* only for debug printing */
2168             )
2169 {
2170    vassert(shift_amt < 32);
2171    vassert(how < 4);
2172
2173    switch (how) {
2174
2175       case 0:
2176          compute_result_and_C_after_LSL_by_imm5(
2177             buf, res, newC, rMt, shift_amt, rM
2178          );
2179          break;
2180
2181       case 1:
2182          compute_result_and_C_after_LSR_by_imm5(
2183             buf, res, newC, rMt, shift_amt, rM
2184          );
2185          break;
2186
2187       case 2:
2188          compute_result_and_C_after_ASR_by_imm5(
2189             buf, res, newC, rMt, shift_amt, rM
2190          );
2191          break;
2192
2193       case 3:
2194          if (shift_amt == 0) {
2195             IRTemp oldcT = newTemp(Ity_I32);
2196             // rotate right 1 bit through carry (?)
2197             // RRX -- described at ARM ARM A5-17
2198             // res  = (oldC << 31) | (Rm >>u 1)
2199             // newC = Rm[0]
2200             if (newC) {
2201                assign( *newC,
2202                        binop(Iop_And32, mkexpr(rMt), mkU32(1)));
2203             }
2204             assign( oldcT, mk_armg_calculate_flag_c() );
2205             assign( *res,
2206                     binop(Iop_Or32,
2207                           binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
2208                           binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
2209             DIS(buf, "r%u, RRX", rM);
2210          } else {
2211             // rotate right in range 1..31
2212             // res  = Rm `ror` shift_amt
2213             // newC = Rm[shift_amt - 1]
2214             vassert(shift_amt >= 1 && shift_amt <= 31);
2215             if (newC) {
2216                assign( *newC,
2217                        binop(Iop_And32,
2218                              binop(Iop_Shr32, mkexpr(rMt),
2219                                               mkU8(shift_amt - 1)),
2220                              mkU32(1)));
2221             }
2222             assign( *res,
2223                     binop(Iop_Or32,
2224                           binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
2225                           binop(Iop_Shl32, mkexpr(rMt),
2226                                            mkU8(32-shift_amt))));
2227             DIS(buf, "r%u, ROR #%u", rM, shift_amt);
2228          }
2229          break;
2230
2231       default:
2232          /*NOTREACHED*/
2233          vassert(0);
2234    }
2235 }
2236
2237
2238 /* Generate an expression corresponding to the register-shift case of
2239    a shifter operand.  This is used both for ARM and Thumb2.
2240
2241    Bind it to a temporary, and return that via *res.  If newC is
2242    non-NULL, also compute a value for the shifter's carry out (in the
2243    LSB of a word), bind it to a temporary, and return that via *shco.
2244
2245    Generates GETs from the guest state and is therefore not safe to
2246    use once we start doing PUTs to it, for any given instruction.
2247
2248    'how' is encoded thusly:
2249       00b LSL,  01b LSR,  10b ASR,  11b ROR
2250    Most but not all ARM and Thumb integer insns use this encoding.
2251    Be careful to ensure the right value is passed here.
2252 */
2253 static void compute_result_and_C_after_shift_by_reg (
2254                /*OUT*/HChar*  buf,
2255                /*OUT*/IRTemp* res,
2256                /*OUT*/IRTemp* newC,
2257                IRTemp  rMt,       /* reg to shift */
2258                UInt    how,       /* what kind of shift */
2259                IRTemp  rSt,       /* shift amount */
2260                UInt    rM,        /* only for debug printing */
2261                UInt    rS         /* only for debug printing */
2262             )
2263 {
2264    vassert(how < 4);
2265    switch (how) {
2266       case 0: { /* LSL */
2267          compute_result_and_C_after_LSL_by_reg(
2268             buf, res, newC, rMt, rSt, rM, rS
2269          );
2270          break;
2271       }
2272       case 1: { /* LSR */
2273          compute_result_and_C_after_LSR_by_reg(
2274             buf, res, newC, rMt, rSt, rM, rS
2275          );
2276          break;
2277       }
2278       case 2: { /* ASR */
2279          compute_result_and_C_after_ASR_by_reg(
2280             buf, res, newC, rMt, rSt, rM, rS
2281          );
2282          break;
2283       }
2284       case 3: { /* ROR */
2285          compute_result_and_C_after_ROR_by_reg(
2286              buf, res, newC, rMt, rSt, rM, rS
2287          );
2288          break;
2289       }
2290       default:
2291          /*NOTREACHED*/
2292          vassert(0);
2293    }
2294 }
2295
2296
2297 /* Generate an expression corresponding to a shifter_operand, bind it
2298    to a temporary, and return that via *shop.  If shco is non-NULL,
2299    also compute a value for the shifter's carry out (in the LSB of a
2300    word), bind it to a temporary, and return that via *shco.
2301
2302    If for some reason we can't come up with a shifter operand (missing
2303    case?  not really a shifter operand?) return False.
2304
2305    Generates GETs from the guest state and is therefore not safe to
2306    use once we start doing PUTs to it, for any given instruction.
2307
2308    For ARM insns only; not for Thumb.
2309 */
2310 static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
2311                                  /*OUT*/IRTemp* shop,
2312                                  /*OUT*/IRTemp* shco,
2313                                  /*OUT*/HChar* buf )
2314 {
2315    UInt insn_4 = (insn_11_0 >> 4) & 1;
2316    UInt insn_7 = (insn_11_0 >> 7) & 1;
2317    vassert(insn_25 <= 0x1);
2318    vassert(insn_11_0 <= 0xFFF);
2319
2320    vassert(shop && *shop == IRTemp_INVALID);
2321    *shop = newTemp(Ity_I32);
2322
2323    if (shco) {
2324       vassert(*shco == IRTemp_INVALID);
2325       *shco = newTemp(Ity_I32);
2326    }
2327
2328    /* 32-bit immediate */
2329
2330    if (insn_25 == 1) {
2331       /* immediate: (7:0) rotated right by 2 * (11:8) */
2332       UInt imm = (insn_11_0 >> 0) & 0xFF;
2333       UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
2334       vassert(rot <= 30);
2335       imm = ROR32(imm, rot);
2336       if (shco) {
2337          if (rot == 0) {
2338             assign( *shco, mk_armg_calculate_flag_c() );
2339          } else {
2340             assign( *shco, mkU32( (imm >> 31) & 1 ) );
2341          }
2342       }
2343       DIS(buf, "#0x%x", imm);
2344       assign( *shop, mkU32(imm) );
2345       return True;
2346    }
2347
2348    /* Shift/rotate by immediate */
2349
2350    if (insn_25 == 0 && insn_4 == 0) {
2351       /* Rm (3:0) shifted (6:5) by immediate (11:7) */
2352       UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
2353       UInt rM        = (insn_11_0 >> 0) & 0xF;
2354       UInt how       = (insn_11_0 >> 5) & 3;
2355       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2356       IRTemp rMt = newTemp(Ity_I32);
2357       assign(rMt, getIRegA(rM));
2358
2359       vassert(shift_amt <= 31);
2360
2361       compute_result_and_C_after_shift_by_imm5(
2362          buf, shop, shco, rMt, how, shift_amt, rM
2363       );
2364       return True;
2365    }
2366
2367    /* Shift/rotate by register */
2368    if (insn_25 == 0 && insn_4 == 1) {
2369       /* Rm (3:0) shifted (6:5) by Rs (11:8) */
2370       UInt rM  = (insn_11_0 >> 0) & 0xF;
2371       UInt rS  = (insn_11_0 >> 8) & 0xF;
2372       UInt how = (insn_11_0 >> 5) & 3;
2373       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2374       IRTemp rMt = newTemp(Ity_I32);
2375       IRTemp rSt = newTemp(Ity_I32);
2376
2377       if (insn_7 == 1)
2378          return False; /* not really a shifter operand */
2379
2380       assign(rMt, getIRegA(rM));
2381       assign(rSt, getIRegA(rS));
2382
2383       compute_result_and_C_after_shift_by_reg(
2384          buf, shop, shco, rMt, how, rSt, rM, rS
2385       );
2386       return True;
2387    }
2388
2389    vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
2390    return False;
2391 }
2392
2393
2394 /* ARM only */
2395 static
2396 IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
2397                                     /*OUT*/HChar* buf )
2398 {
2399    vassert(rN < 16);
2400    vassert(bU < 2);
2401    vassert(imm12 < 0x1000);
2402    HChar opChar = bU == 1 ? '+' : '-';
2403    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
2404    return
2405       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2406              getIRegA(rN),
2407              mkU32(imm12) );
2408 }
2409
2410
2411 /* ARM only.
2412    NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
2413 */
2414 static
2415 IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
2416                                           UInt sh2, UInt imm5,
2417                                           /*OUT*/HChar* buf )
2418 {
2419    vassert(rN < 16);
2420    vassert(bU < 2);
2421    vassert(rM < 16);
2422    vassert(sh2 < 4);
2423    vassert(imm5 < 32);
2424    HChar   opChar = bU == 1 ? '+' : '-';
2425    IRExpr* index  = NULL;
2426    switch (sh2) {
2427       case 0: /* LSL */
2428          /* imm5 can be in the range 0 .. 31 inclusive. */
2429          index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
2430          DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
2431          break;
2432       case 1: /* LSR */
2433          if (imm5 == 0) {
2434             index = mkU32(0);
2435             vassert(0); // ATC
2436          } else {
2437             index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
2438          }
2439          DIS(buf, "[r%u, %cr%u, LSR #%u]",
2440                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2441          break;
2442       case 2: /* ASR */
2443          /* Doesn't this just mean that the behaviour with imm5 == 0
2444             is the same as if it had been 31 ? */
2445          if (imm5 == 0) {
2446             index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
2447             vassert(0); // ATC
2448          } else {
2449             index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
2450          }
2451          DIS(buf, "[r%u, %cr%u, ASR #%u]",
2452                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2453          break;
2454       case 3: /* ROR or RRX */
2455          if (imm5 == 0) {
2456             IRTemp rmT    = newTemp(Ity_I32);
2457             IRTemp cflagT = newTemp(Ity_I32);
2458             assign(rmT, getIRegA(rM));
2459             assign(cflagT, mk_armg_calculate_flag_c());
2460             index = binop(Iop_Or32,
2461                           binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
2462                           binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
2463             DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
2464          } else {
2465             IRTemp rmT = newTemp(Ity_I32);
2466             assign(rmT, getIRegA(rM));
2467             vassert(imm5 >= 1 && imm5 <= 31);
2468             index = binop(Iop_Or32,
2469                           binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
2470                           binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
2471             DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
2472          }
2473          break;
2474       default:
2475          vassert(0);
2476    }
2477    vassert(index);
2478    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2479                 getIRegA(rN), index);
2480 }
2481
2482
2483 /* ARM only */
2484 static
2485 IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
2486                                    /*OUT*/HChar* buf )
2487 {
2488    vassert(rN < 16);
2489    vassert(bU < 2);
2490    vassert(imm8 < 0x100);
2491    HChar opChar = bU == 1 ? '+' : '-';
2492    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
2493    return
2494       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2495              getIRegA(rN),
2496              mkU32(imm8) );
2497 }
2498
2499
2500 /* ARM only */
2501 static
2502 IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
2503                                   /*OUT*/HChar* buf )
2504 {
2505    vassert(rN < 16);
2506    vassert(bU < 2);
2507    vassert(rM < 16);
2508    HChar   opChar = bU == 1 ? '+' : '-';
2509    IRExpr* index  = getIRegA(rM);
2510    DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
2511    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2512                 getIRegA(rN), index);
2513 }
2514
2515
2516 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2517    as an IRCmpF64Result.  Generate code to convert it to an
2518    ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
2519    Assign a new temp to hold that value, and return the temp. */
2520 static
2521 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
2522 {
2523    IRTemp ix       = newTemp(Ity_I32);
2524    IRTemp termL    = newTemp(Ity_I32);
2525    IRTemp termR    = newTemp(Ity_I32);
2526    IRTemp nzcv     = newTemp(Ity_I32);
2527
2528    /* This is where the fun starts.  We have to convert 'irRes' from
2529       an IR-convention return result (IRCmpF64Result) to an
2530       ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
2531       4 bits of 'nzcv'. */
2532    /* Map compare result from IR to ARM(nzcv) */
2533    /*
2534       FP cmp result | IR   | ARM(nzcv)
2535       --------------------------------
2536       UN              0x45   0011
2537       LT              0x01   1000
2538       GT              0x00   0010
2539       EQ              0x40   0110
2540    */
2541    /* Now since you're probably wondering WTF ..
2542
2543       ix fishes the useful bits out of the IR value, bits 6 and 0, and
2544       places them side by side, giving a number which is 0, 1, 2 or 3.
2545
2546       termL is a sequence cooked up by GNU superopt.  It converts ix
2547          into an almost correct value NZCV value (incredibly), except
2548          for the case of UN, where it produces 0100 instead of the
2549          required 0011.
2550
2551       termR is therefore a correction term, also computed from ix.  It
2552          is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
2553          the final correct value, we subtract termR from termL.
2554
2555       Don't take my word for it.  There's a test program at the bottom
2556       of this file, to try this out with.
2557    */
2558    assign(
2559       ix,
2560       binop(Iop_Or32,
2561             binop(Iop_And32,
2562                   binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
2563                   mkU32(3)),
2564             binop(Iop_And32, mkexpr(irRes), mkU32(1))));
2565
2566    assign(
2567       termL,
2568       binop(Iop_Add32,
2569             binop(Iop_Shr32,
2570                   binop(Iop_Sub32,
2571                         binop(Iop_Shl32,
2572                               binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
2573                               mkU8(30)),
2574                         mkU32(1)),
2575                   mkU8(29)),
2576             mkU32(1)));
2577
2578    assign(
2579       termR,
2580       binop(Iop_And32,
2581             binop(Iop_And32,
2582                   mkexpr(ix),
2583                   binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
2584             mkU32(1)));
2585
2586    assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
2587    return nzcv;
2588 }
2589
2590
2591 /* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
2592    updatesC is non-NULL, a boolean is written to it indicating whether
2593    or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
2594 */
2595 static UInt thumbExpandImm ( Bool* updatesC,
2596                              UInt imm1, UInt imm3, UInt imm8 )
2597 {
2598    vassert(imm1 < (1<<1));
2599    vassert(imm3 < (1<<3));
2600    vassert(imm8 < (1<<8));
2601    UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
2602    UInt abcdefgh = imm8;
2603    UInt lbcdefgh = imm8 | 0x80;
2604    if (updatesC) {
2605       *updatesC = i_imm3_a >= 8;
2606    }
2607    switch (i_imm3_a) {
2608       case 0: case 1:
2609          return abcdefgh;
2610       case 2: case 3:
2611          return (abcdefgh << 16) | abcdefgh;
2612       case 4: case 5:
2613          return (abcdefgh << 24) | (abcdefgh << 8);
2614       case 6: case 7:
2615          return (abcdefgh << 24) | (abcdefgh << 16)
2616                 | (abcdefgh << 8) | abcdefgh;
2617       case 8 ... 31:
2618          return lbcdefgh << (32 - i_imm3_a);
2619       default:
2620          break;
2621    }
2622    /*NOTREACHED*/vassert(0);
2623 }
2624
2625
2626 /* Version of thumbExpandImm where we simply feed it the
2627    instruction halfwords (the lowest addressed one is I0). */
2628 static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
2629                                         UShort i0s, UShort i1s )
2630 {
2631    UInt i0    = (UInt)i0s;
2632    UInt i1    = (UInt)i1s;
2633    UInt imm1  = SLICE_UInt(i0,10,10);
2634    UInt imm3  = SLICE_UInt(i1,14,12);
2635    UInt imm8  = SLICE_UInt(i1,7,0);
2636    return thumbExpandImm(updatesC, imm1, imm3, imm8);
2637 }
2638
2639
2640 /* Thumb16 only.  Given the firstcond and mask fields from an IT
2641    instruction, compute the 32-bit ITSTATE value implied, as described
2642    in libvex_guest_arm.h.  This is not the ARM ARM representation.
2643    Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
2644    disassembly printing.  Returns False if firstcond or mask
2645    denote something invalid.
2646
2647    The number and conditions for the instructions to be
2648    conditionalised depend on firstcond and mask:
2649
2650    mask      cond 1    cond 2      cond 3      cond 4
2651
2652    1000      fc[3:0]
2653    x100      fc[3:0]   fc[3:1]:x
2654    xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
2655    xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
2656
2657    The condition fields are assembled in *itstate backwards (cond 4 at
2658    the top, cond 1 at the bottom).  Conditions are << 4'd and then
2659    ^0xE'd, and those fields that correspond to instructions in the IT
2660    block are tagged with a 1 bit.
2661 */
2662 static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
2663                               /*OUT*/HChar* ch1,
2664                               /*OUT*/HChar* ch2,
2665                               /*OUT*/HChar* ch3,
2666                               UInt firstcond, UInt mask )
2667 {
2668    vassert(firstcond <= 0xF);
2669    vassert(mask <= 0xF);
2670    *itstate = 0;
2671    *ch1 = *ch2 = *ch3 = '.';
2672    if (mask == 0)
2673       return False; /* the logic below actually ensures this anyway,
2674                        but clearer to make it explicit. */
2675    if (firstcond == 0xF)
2676       return False; /* NV is not allowed */
2677    if (firstcond == 0xE && popcount32(mask) != 1)
2678       return False; /* if firstcond is AL then all the rest must be too */
2679
2680    UInt m3 = (mask >> 3) & 1;
2681    UInt m2 = (mask >> 2) & 1;
2682    UInt m1 = (mask >> 1) & 1;
2683    UInt m0 = (mask >> 0) & 1;
2684
2685    UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
2686    UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
2687
2688    if (m3 == 1 && (m2|m1|m0) == 0) {
2689       *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
2690       *itstate ^= 0xE0E0E0E0;
2691       return True;
2692    }
2693
2694    if (m2 == 1 && (m1|m0) == 0) {
2695       *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
2696       *itstate ^= 0xE0E0E0E0;
2697       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2698       return True;
2699    }
2700
2701    if (m1 == 1 && m0 == 0) {
2702       *itstate = (ni << 24)
2703                  | (setbit32(fc, 4, m2) << 16)
2704                  | (setbit32(fc, 4, m3) << 8) | fc;
2705       *itstate ^= 0xE0E0E0E0;
2706       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2707       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2708       return True;
2709    }
2710
2711    if (m0 == 1) {
2712       *itstate = (setbit32(fc, 4, m1) << 24)
2713                  | (setbit32(fc, 4, m2) << 16)
2714                  | (setbit32(fc, 4, m3) << 8) | fc;
2715       *itstate ^= 0xE0E0E0E0;
2716       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2717       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2718       *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
2719       return True;
2720    }
2721
2722    return False;
2723 }
2724
2725
2726 /* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
2727    Chapter 7 Section 1. */
2728 static IRTemp gen_BITREV ( IRTemp x0 )
2729 {
2730    IRTemp x1 = newTemp(Ity_I32);
2731    IRTemp x2 = newTemp(Ity_I32);
2732    IRTemp x3 = newTemp(Ity_I32);
2733    IRTemp x4 = newTemp(Ity_I32);
2734    IRTemp x5 = newTemp(Ity_I32);
2735    UInt   c1 = 0x55555555;
2736    UInt   c2 = 0x33333333;
2737    UInt   c3 = 0x0F0F0F0F;
2738    UInt   c4 = 0x00FF00FF;
2739    UInt   c5 = 0x0000FFFF;
2740    assign(x1,
2741           binop(Iop_Or32,
2742                 binop(Iop_Shl32,
2743                       binop(Iop_And32, mkexpr(x0), mkU32(c1)),
2744                       mkU8(1)),
2745                 binop(Iop_Shr32,
2746                       binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
2747                       mkU8(1))
2748    ));
2749    assign(x2,
2750           binop(Iop_Or32,
2751                 binop(Iop_Shl32,
2752                       binop(Iop_And32, mkexpr(x1), mkU32(c2)),
2753                       mkU8(2)),
2754                 binop(Iop_Shr32,
2755                       binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
2756                       mkU8(2))
2757    ));
2758    assign(x3,
2759           binop(Iop_Or32,
2760                 binop(Iop_Shl32,
2761                       binop(Iop_And32, mkexpr(x2), mkU32(c3)),
2762                       mkU8(4)),
2763                 binop(Iop_Shr32,
2764                       binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
2765                       mkU8(4))
2766    ));
2767    assign(x4,
2768           binop(Iop_Or32,
2769                 binop(Iop_Shl32,
2770                       binop(Iop_And32, mkexpr(x3), mkU32(c4)),
2771                       mkU8(8)),
2772                 binop(Iop_Shr32,
2773                       binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
2774                       mkU8(8))
2775    ));
2776    assign(x5,
2777           binop(Iop_Or32,
2778                 binop(Iop_Shl32,
2779                       binop(Iop_And32, mkexpr(x4), mkU32(c5)),
2780                       mkU8(16)),
2781                 binop(Iop_Shr32,
2782                       binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
2783                       mkU8(16))
2784    ));
2785    return x5;
2786 }
2787
2788
2789 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2790    0:1:2:3 (aka byte-swap). */
2791 static IRTemp gen_REV ( IRTemp arg )
2792 {
2793    IRTemp res = newTemp(Ity_I32);
2794    assign(res,
2795           binop(Iop_Or32,
2796                 binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
2797           binop(Iop_Or32,
2798                 binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2799                                  mkU32(0x00FF0000)),
2800           binop(Iop_Or32,
2801                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2802                                        mkU32(0x0000FF00)),
2803                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
2804                                        mkU32(0x000000FF) )
2805    ))));
2806    return res;
2807 }
2808
2809
2810 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2811    2:3:0:1 (swap within lo and hi halves). */
2812 static IRTemp gen_REV16 ( IRTemp arg )
2813 {
2814    IRTemp res = newTemp(Ity_I32);
2815    assign(res,
2816           binop(Iop_Or32,
2817                 binop(Iop_And32,
2818                       binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2819                       mkU32(0xFF00FF00)),
2820                 binop(Iop_And32,
2821                       binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2822                       mkU32(0x00FF00FF))));
2823    return res;
2824 }
2825
2826
2827 /*------------------------------------------------------------*/
2828 /*--- Advanced SIMD (NEON) instructions                    ---*/
2829 /*------------------------------------------------------------*/
2830
2831 /*------------------------------------------------------------*/
2832 /*--- NEON data processing                                 ---*/
2833 /*------------------------------------------------------------*/
2834
2835 /* For all NEON DP ops, we use the normal scheme to handle conditional
2836    writes to registers -- pass in condT and hand that on to the
2837    put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
2838    since NEON is unconditional for ARM.  In Thumb mode condT is
2839    derived from the ITSTATE shift register in the normal way. */
2840
2841 static
2842 UInt get_neon_d_regno(UInt theInstr)
2843 {
2844    UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
2845    if (theInstr & 0x40) {
2846       if (x & 1) {
2847          x = x + 0x100;
2848       } else {
2849          x = x >> 1;
2850       }
2851    }
2852    return x;
2853 }
2854
2855 static
2856 UInt get_neon_n_regno(UInt theInstr)
2857 {
2858    UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
2859    if (theInstr & 0x40) {
2860       if (x & 1) {
2861          x = x + 0x100;
2862       } else {
2863          x = x >> 1;
2864       }
2865    }
2866    return x;
2867 }
2868
2869 static
2870 UInt get_neon_m_regno(UInt theInstr)
2871 {
2872    UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
2873    if (theInstr & 0x40) {
2874       if (x & 1) {
2875          x = x + 0x100;
2876       } else {
2877          x = x >> 1;
2878       }
2879    }
2880    return x;
2881 }
2882
2883 static
2884 Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
2885 {
2886    UInt dreg = get_neon_d_regno(theInstr);
2887    UInt mreg = get_neon_m_regno(theInstr);
2888    UInt nreg = get_neon_n_regno(theInstr);
2889    UInt imm4 = (theInstr >> 8) & 0xf;
2890    UInt Q = (theInstr >> 6) & 1;
2891    HChar reg_t = Q ? 'q' : 'd';
2892
2893    if (Q) {
2894       putQReg(dreg, triop(Iop_SliceV128, /*hiV128*/getQReg(mreg),
2895                           /*loV128*/getQReg(nreg), mkU8(imm4)), condT);
2896    } else {
2897       putDRegI64(dreg, triop(Iop_Slice64, /*hiI64*/getDRegI64(mreg),
2898                              /*loI64*/getDRegI64(nreg), mkU8(imm4)), condT);
2899    }
2900    DIP("vext.8 %c%u, %c%u, %c%u, #%u\n", reg_t, dreg, reg_t, nreg,
2901                                          reg_t, mreg, imm4);
2902    return True;
2903 }
2904
2905 /* Generate specific vector FP binary ops, possibly with a fake
2906    rounding mode as required by the primop. */
2907 static
2908 IRExpr* binop_w_fake_RM ( IROp op, IRExpr* argL, IRExpr* argR )
2909 {
2910    switch (op) {
2911       case Iop_Add32Fx4:
2912       case Iop_Sub32Fx4:
2913       case Iop_Mul32Fx4:
2914          return triop(op, get_FAKE_roundingmode(), argL, argR );
2915       case Iop_Add32x4: case Iop_Add16x8:
2916       case Iop_Sub32x4: case Iop_Sub16x8:
2917       case Iop_Mul32x4: case Iop_Mul16x8:
2918       case Iop_Mul32x2: case Iop_Mul16x4:
2919       case Iop_Add32Fx2:
2920       case Iop_Sub32Fx2:
2921       case Iop_Mul32Fx2:
2922       case Iop_PwAdd32Fx2:
2923          return binop(op, argL, argR);
2924       default:
2925         ppIROp(op);
2926         vassert(0);
2927    }
2928 }
2929
2930 /* VTBL, VTBX */
2931 static
2932 Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
2933 {
2934    UInt op = (theInstr >> 6) & 1;
2935    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
2936    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
2937    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
2938    UInt len = (theInstr >> 8) & 3;
2939    Int i;
2940    IROp cmp;
2941    ULong imm;
2942    IRTemp arg_l;
2943    IRTemp old_mask, new_mask, cur_mask;
2944    IRTemp old_res, new_res;
2945    IRTemp old_arg, new_arg;
2946
2947    if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
2948       return False;
2949    if (nreg + len > 31)
2950       return False;
2951
2952    cmp = Iop_CmpGT8Ux8;
2953
2954    old_mask = newTemp(Ity_I64);
2955    old_res = newTemp(Ity_I64);
2956    old_arg = newTemp(Ity_I64);
2957    assign(old_mask, mkU64(0));
2958    assign(old_res, mkU64(0));
2959    assign(old_arg, getDRegI64(mreg));
2960    imm = 8;
2961    imm = (imm <<  8) | imm;
2962    imm = (imm << 16) | imm;
2963    imm = (imm << 32) | imm;
2964
2965    for (i = 0; i <= len; i++) {
2966       arg_l = newTemp(Ity_I64);
2967       new_mask = newTemp(Ity_I64);
2968       cur_mask = newTemp(Ity_I64);
2969       new_res = newTemp(Ity_I64);
2970       new_arg = newTemp(Ity_I64);
2971       assign(arg_l, getDRegI64(nreg+i));
2972       assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
2973       assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
2974       assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
2975       assign(new_res, binop(Iop_Or64,
2976                             mkexpr(old_res),
2977                             binop(Iop_And64,
2978                                   binop(Iop_Perm8x8,
2979                                         mkexpr(arg_l),
2980                                         binop(Iop_And64,
2981                                               mkexpr(old_arg),
2982                                               mkexpr(cur_mask))),
2983                                   mkexpr(cur_mask))));
2984
2985       old_arg = new_arg;
2986       old_mask = new_mask;
2987       old_res = new_res;
2988    }
2989    if (op) {
2990       new_res = newTemp(Ity_I64);
2991       assign(new_res, binop(Iop_Or64,
2992                             binop(Iop_And64,
2993                                   getDRegI64(dreg),
2994                                   unop(Iop_Not64, mkexpr(old_mask))),
2995                             mkexpr(old_res)));
2996       old_res = new_res;
2997    }
2998
2999    putDRegI64(dreg, mkexpr(old_res), condT);
3000    DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
3001    if (len > 0) {
3002       DIP("d%u-d%u", nreg, nreg + len);
3003    } else {
3004       DIP("d%u", nreg);
3005    }
3006    DIP("}, d%u\n", mreg);
3007    return True;
3008 }
3009
3010 /* VDUP (scalar)  */
3011 static
3012 Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
3013 {
3014    UInt Q = (theInstr >> 6) & 1;
3015    UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
3016    UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
3017    UInt imm4 = (theInstr >> 16) & 0xF;
3018    UInt index;
3019    UInt size;
3020    IRTemp arg_m;
3021    IRTemp res;
3022    IROp op, op2;
3023
3024    if ((imm4 == 0) || (imm4 == 8))
3025       return False;
3026    if ((Q == 1) && ((dreg & 1) == 1))
3027       return False;
3028    if (Q)
3029       dreg >>= 1;
3030    arg_m = newTemp(Ity_I64);
3031    assign(arg_m, getDRegI64(mreg));
3032    if (Q)
3033       res = newTemp(Ity_V128);
3034    else
3035       res = newTemp(Ity_I64);
3036    if ((imm4 & 1) == 1) {
3037       op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
3038       op2 = Iop_GetElem8x8;
3039       index = imm4 >> 1;
3040       size = 8;
3041    } else if ((imm4 & 3) == 2) {
3042       op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
3043       op2 = Iop_GetElem16x4;
3044       index = imm4 >> 2;
3045       size = 16;
3046    } else if ((imm4 & 7) == 4) {
3047       op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
3048       op2 = Iop_GetElem32x2;
3049       index = imm4 >> 3;
3050       size = 32;
3051    } else {
3052       return False; // can this ever happen?
3053    }
3054    assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
3055    if (Q) {
3056       putQReg(dreg, mkexpr(res), condT);
3057    } else {
3058       putDRegI64(dreg, mkexpr(res), condT);
3059    }
3060    DIP("vdup.%u %c%u, d%u[%u]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
3061    return True;
3062 }
3063
3064 /* A7.4.1 Three registers of the same length */
3065 static
3066 Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
3067 {
3068    /* In paths where this returns False, indicating a non-decodable
3069       instruction, there may still be some IR assignments to temporaries
3070       generated.  This is inconvenient but harmless, and the post-front-end
3071       IR optimisation pass will just remove them anyway.  So there's no
3072       effort made here to tidy it up.
3073    */
3074    UInt Q = (theInstr >> 6) & 1;
3075    UInt dreg = get_neon_d_regno(theInstr);
3076    UInt nreg = get_neon_n_regno(theInstr);
3077    UInt mreg = get_neon_m_regno(theInstr);
3078    UInt A = (theInstr >> 8) & 0xF;
3079    UInt B = (theInstr >> 4) & 1;
3080    UInt C = (theInstr >> 20) & 0x3;
3081    UInt U = (theInstr >> 24) & 1;
3082    UInt size = C;
3083
3084    IRTemp arg_n;
3085    IRTemp arg_m;
3086    IRTemp res;
3087
3088    if (Q) {
3089       arg_n = newTemp(Ity_V128);
3090       arg_m = newTemp(Ity_V128);
3091       res = newTemp(Ity_V128);
3092       assign(arg_n, getQReg(nreg));
3093       assign(arg_m, getQReg(mreg));
3094    } else {
3095       arg_n = newTemp(Ity_I64);
3096       arg_m = newTemp(Ity_I64);
3097       res = newTemp(Ity_I64);
3098       assign(arg_n, getDRegI64(nreg));
3099       assign(arg_m, getDRegI64(mreg));
3100    }
3101
3102    switch(A) {
3103       case 0:
3104          if (B == 0) {
3105             /* VHADD */
3106             ULong imm = 0;
3107             IRExpr *imm_val;
3108             IROp addOp;
3109             IROp andOp;
3110             IROp shOp;
3111             HChar regType = Q ? 'q' : 'd';
3112
3113             if (size == 3)
3114                return False;
3115             switch(size) {
3116                case 0: imm = 0x101010101010101LL; break;
3117                case 1: imm = 0x1000100010001LL; break;
3118                case 2: imm = 0x100000001LL; break;
3119                default: vassert(0);
3120             }
3121             if (Q) {
3122                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3123                andOp = Iop_AndV128;
3124             } else {
3125                imm_val = mkU64(imm);
3126                andOp = Iop_And64;
3127             }
3128             if (U) {
3129                switch(size) {
3130                   case 0:
3131                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3132                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3133                      break;
3134                   case 1:
3135                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3136                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3137                      break;
3138                   case 2:
3139                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3140                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3141                      break;
3142                   default:
3143                      vassert(0);
3144                }
3145             } else {
3146                switch(size) {
3147                   case 0:
3148                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3149                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3150                      break;
3151                   case 1:
3152                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3153                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3154                      break;
3155                   case 2:
3156                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3157                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3158                      break;
3159                   default:
3160                      vassert(0);
3161                }
3162             }
3163             assign(res,
3164                    binop(addOp,
3165                          binop(addOp,
3166                                binop(shOp, mkexpr(arg_m), mkU8(1)),
3167                                binop(shOp, mkexpr(arg_n), mkU8(1))),
3168                          binop(shOp,
3169                                binop(addOp,
3170                                      binop(andOp, mkexpr(arg_m), imm_val),
3171                                      binop(andOp, mkexpr(arg_n), imm_val)),
3172                                mkU8(1))));
3173             DIP("vhadd.%c%d %c%u, %c%u, %c%u\n",
3174                 U ? 'u' : 's', 8 << size, regType,
3175                 dreg, regType, nreg, regType, mreg);
3176          } else {
3177             /* VQADD */
3178             IROp op, op2;
3179             IRTemp tmp;
3180             HChar reg_t = Q ? 'q' : 'd';
3181             if (Q) {
3182                switch (size) {
3183                   case 0:
3184                      op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
3185                      op2 = Iop_Add8x16;
3186                      break;
3187                   case 1:
3188                      op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
3189                      op2 = Iop_Add16x8;
3190                      break;
3191                   case 2:
3192                      op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
3193                      op2 = Iop_Add32x4;
3194                      break;
3195                   case 3:
3196                      op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
3197                      op2 = Iop_Add64x2;
3198                      break;
3199                   default:
3200                      vassert(0);
3201                }
3202             } else {
3203                switch (size) {
3204                   case 0:
3205                      op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
3206                      op2 = Iop_Add8x8;
3207                      break;
3208                   case 1:
3209                      op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
3210                      op2 = Iop_Add16x4;
3211                      break;
3212                   case 2:
3213                      op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
3214                      op2 = Iop_Add32x2;
3215                      break;
3216                   case 3:
3217                      op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
3218                      op2 = Iop_Add64;
3219                      break;
3220                   default:
3221                      vassert(0);
3222                }
3223             }
3224             if (Q) {
3225                tmp = newTemp(Ity_V128);
3226             } else {
3227                tmp = newTemp(Ity_I64);
3228             }
3229             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3230             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3231             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3232             DIP("vqadd.%c%d %c%u %c%u, %c%u\n",
3233                 U ? 'u' : 's',
3234                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3235          }
3236          break;
3237       case 1:
3238          if (B == 0) {
3239             /* VRHADD */
3240             /* VRHADD C, A, B ::=
3241                  C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
3242             IROp shift_op, add_op;
3243             IRTemp cc;
3244             ULong one = 1;
3245             HChar reg_t = Q ? 'q' : 'd';
3246             switch (size) {
3247                case 0: one = (one <<  8) | one; /* fall through */
3248                case 1: one = (one << 16) | one; /* fall through */
3249                case 2: one = (one << 32) | one; break;
3250                case 3: return False;
3251                default: vassert(0);
3252             }
3253             if (Q) {
3254                switch (size) {
3255                   case 0:
3256                      shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
3257                      add_op = Iop_Add8x16;
3258                      break;
3259                   case 1:
3260                      shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
3261                      add_op = Iop_Add16x8;
3262                      break;
3263                   case 2:
3264                      shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
3265                      add_op = Iop_Add32x4;
3266                      break;
3267                   case 3:
3268                      return False;
3269                   default:
3270                      vassert(0);
3271                }
3272             } else {
3273                switch (size) {
3274                   case 0:
3275                      shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
3276                      add_op = Iop_Add8x8;
3277                      break;
3278                   case 1:
3279                      shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
3280                      add_op = Iop_Add16x4;
3281                      break;
3282                   case 2:
3283                      shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
3284                      add_op = Iop_Add32x2;
3285                      break;
3286                   case 3:
3287                      return False;
3288                   default:
3289                      vassert(0);
3290                }
3291             }
3292             if (Q) {
3293                cc = newTemp(Ity_V128);
3294                assign(cc, binop(shift_op,
3295                                 binop(add_op,
3296                                       binop(add_op,
3297                                             binop(Iop_AndV128,
3298                                                   mkexpr(arg_n),
3299                                                   binop(Iop_64HLtoV128,
3300                                                         mkU64(one),
3301                                                         mkU64(one))),
3302                                             binop(Iop_AndV128,
3303                                                   mkexpr(arg_m),
3304                                                   binop(Iop_64HLtoV128,
3305                                                         mkU64(one),
3306                                                         mkU64(one)))),
3307                                       binop(Iop_64HLtoV128,
3308                                             mkU64(one),
3309                                             mkU64(one))),
3310                                 mkU8(1)));
3311                assign(res, binop(add_op,
3312                                  binop(add_op,
3313                                        binop(shift_op,
3314                                              mkexpr(arg_n),
3315                                              mkU8(1)),
3316                                        binop(shift_op,
3317                                              mkexpr(arg_m),
3318                                              mkU8(1))),
3319                                  mkexpr(cc)));
3320             } else {
3321                cc = newTemp(Ity_I64);
3322                assign(cc, binop(shift_op,
3323                                 binop(add_op,
3324                                       binop(add_op,
3325                                             binop(Iop_And64,
3326                                                   mkexpr(arg_n),
3327                                                   mkU64(one)),
3328                                             binop(Iop_And64,
3329                                                   mkexpr(arg_m),
3330                                                   mkU64(one))),
3331                                       mkU64(one)),
3332                                 mkU8(1)));
3333                assign(res, binop(add_op,
3334                                  binop(add_op,
3335                                        binop(shift_op,
3336                                              mkexpr(arg_n),
3337                                              mkU8(1)),
3338                                        binop(shift_op,
3339                                              mkexpr(arg_m),
3340                                              mkU8(1))),
3341                                  mkexpr(cc)));
3342             }
3343             DIP("vrhadd.%c%d %c%u, %c%u, %c%u\n",
3344                 U ? 'u' : 's',
3345                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3346          } else {
3347             if (U == 0)  {
3348                switch(C) {
3349                   case 0: {
3350                      /* VAND  */
3351                      HChar reg_t = Q ? 'q' : 'd';
3352                      if (Q) {
3353                         assign(res, binop(Iop_AndV128, mkexpr(arg_n),
3354                                                        mkexpr(arg_m)));
3355                      } else {
3356                         assign(res, binop(Iop_And64, mkexpr(arg_n),
3357                                                      mkexpr(arg_m)));
3358                      }
3359                      DIP("vand %c%u, %c%u, %c%u\n",
3360                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
3361                      break;
3362                   }
3363                   case 1: {
3364                      /* VBIC  */
3365                      HChar reg_t = Q ? 'q' : 'd';
3366                      if (Q) {
3367                         assign(res, binop(Iop_AndV128,mkexpr(arg_n),
3368                                unop(Iop_NotV128, mkexpr(arg_m))));
3369                      } else {
3370                         assign(res, binop(Iop_And64, mkexpr(arg_n),
3371                                unop(Iop_Not64, mkexpr(arg_m))));
3372                      }
3373                      DIP("vbic %c%u, %c%u, %c%u\n",
3374                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
3375                      break;
3376                   }
3377                   case 2:
3378                      if ( nreg != mreg) {
3379                         /* VORR  */
3380                         HChar reg_t = Q ? 'q' : 'd';
3381                         if (Q) {
3382                            assign(res, binop(Iop_OrV128, mkexpr(arg_n),
3383                                                          mkexpr(arg_m)));
3384                         } else {
3385                            assign(res, binop(Iop_Or64, mkexpr(arg_n),
3386                                                        mkexpr(arg_m)));
3387                         }
3388                         DIP("vorr %c%u, %c%u, %c%u\n",
3389                             reg_t, dreg, reg_t, nreg, reg_t, mreg);
3390                      } else {
3391                         /* VMOV  */
3392                         HChar reg_t = Q ? 'q' : 'd';
3393                         assign(res, mkexpr(arg_m));
3394                         DIP("vmov %c%u, %c%u\n", reg_t, dreg, reg_t, mreg);
3395                      }
3396                      break;
3397                   case 3:{
3398                      /* VORN  */
3399                      HChar reg_t = Q ? 'q' : 'd';
3400                      if (Q) {
3401                         assign(res, binop(Iop_OrV128,mkexpr(arg_n),
3402                                unop(Iop_NotV128, mkexpr(arg_m))));
3403                      } else {
3404                         assign(res, binop(Iop_Or64, mkexpr(arg_n),
3405                                unop(Iop_Not64, mkexpr(arg_m))));
3406                      }
3407                      DIP("vorn %c%u, %c%u, %c%u\n",
3408                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
3409                      break;
3410                   }
3411                   default:
3412                      vassert(0);
3413                }
3414             } else {
3415                switch(C) {
3416                   case 0:
3417                      /* VEOR (XOR)  */
3418                      if (Q) {
3419                         assign(res, binop(Iop_XorV128, mkexpr(arg_n),
3420                                                        mkexpr(arg_m)));
3421                      } else {
3422                         assign(res, binop(Iop_Xor64, mkexpr(arg_n),
3423                                                      mkexpr(arg_m)));
3424                      }
3425                      DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
3426                            Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3427                      break;
3428                   case 1:
3429                      /* VBSL  */
3430                      if (Q) {
3431                         IRTemp reg_d = newTemp(Ity_V128);
3432                         assign(reg_d, getQReg(dreg));
3433                         assign(res,
3434                                binop(Iop_OrV128,
3435                                      binop(Iop_AndV128, mkexpr(arg_n),
3436                                                         mkexpr(reg_d)),
3437                                      binop(Iop_AndV128,
3438                                            mkexpr(arg_m),
3439                                            unop(Iop_NotV128,
3440                                                  mkexpr(reg_d)) ) ) );
3441                      } else {
3442                         IRTemp reg_d = newTemp(Ity_I64);
3443                         assign(reg_d, getDRegI64(dreg));
3444                         assign(res,
3445                                binop(Iop_Or64,
3446                                      binop(Iop_And64, mkexpr(arg_n),
3447                                                       mkexpr(reg_d)),
3448                                      binop(Iop_And64,
3449                                            mkexpr(arg_m),
3450                                            unop(Iop_Not64, mkexpr(reg_d)))));
3451                      }
3452                      DIP("vbsl %c%u, %c%u, %c%u\n",
3453                          Q ? 'q' : 'd', dreg,
3454                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3455                      break;
3456                   case 2:
3457                      /* VBIT  */
3458                      if (Q) {
3459                         IRTemp reg_d = newTemp(Ity_V128);
3460                         assign(reg_d, getQReg(dreg));
3461                         assign(res,
3462                                binop(Iop_OrV128,
3463                                      binop(Iop_AndV128, mkexpr(arg_n),
3464                                                         mkexpr(arg_m)),
3465                                      binop(Iop_AndV128,
3466                                            mkexpr(reg_d),
3467                                            unop(Iop_NotV128, mkexpr(arg_m)))));
3468                      } else {
3469                         IRTemp reg_d = newTemp(Ity_I64);
3470                         assign(reg_d, getDRegI64(dreg));
3471                         assign(res,
3472                                binop(Iop_Or64,
3473                                      binop(Iop_And64, mkexpr(arg_n),
3474                                                       mkexpr(arg_m)),
3475                                      binop(Iop_And64,
3476                                            mkexpr(reg_d),
3477                                            unop(Iop_Not64, mkexpr(arg_m)))));
3478                      }
3479                      DIP("vbit %c%u, %c%u, %c%u\n",
3480                          Q ? 'q' : 'd', dreg,
3481                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3482                      break;
3483                   case 3:
3484                      /* VBIF  */
3485                      if (Q) {
3486                         IRTemp reg_d = newTemp(Ity_V128);
3487                         assign(reg_d, getQReg(dreg));
3488                         assign(res,
3489                                binop(Iop_OrV128,
3490                                      binop(Iop_AndV128, mkexpr(reg_d),
3491                                                         mkexpr(arg_m)),
3492                                      binop(Iop_AndV128,
3493                                            mkexpr(arg_n),
3494                                            unop(Iop_NotV128, mkexpr(arg_m)))));
3495                      } else {
3496                         IRTemp reg_d = newTemp(Ity_I64);
3497                         assign(reg_d, getDRegI64(dreg));
3498                         assign(res,
3499                                binop(Iop_Or64,
3500                                      binop(Iop_And64, mkexpr(reg_d),
3501                                                       mkexpr(arg_m)),
3502                                      binop(Iop_And64,
3503                                            mkexpr(arg_n),
3504                                            unop(Iop_Not64, mkexpr(arg_m)))));
3505                      }
3506                      DIP("vbif %c%u, %c%u, %c%u\n",
3507                          Q ? 'q' : 'd', dreg,
3508                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3509                      break;
3510                   default:
3511                      vassert(0);
3512                }
3513             }
3514          }
3515          break;
3516       case 2:
3517          if (B == 0) {
3518             /* VHSUB */
3519             /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
3520             ULong imm = 0;
3521             IRExpr *imm_val;
3522             IROp subOp;
3523             IROp notOp;
3524             IROp andOp;
3525             IROp shOp;
3526             if (size == 3)
3527                return False;
3528             switch(size) {
3529                case 0: imm = 0x101010101010101LL; break;
3530                case 1: imm = 0x1000100010001LL; break;
3531                case 2: imm = 0x100000001LL; break;
3532                default: vassert(0);
3533             }
3534             if (Q) {
3535                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3536                andOp = Iop_AndV128;
3537                notOp = Iop_NotV128;
3538             } else {
3539                imm_val = mkU64(imm);
3540                andOp = Iop_And64;
3541                notOp = Iop_Not64;
3542             }
3543             if (U) {
3544                switch(size) {
3545                   case 0:
3546                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3547                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3548                      break;
3549                   case 1:
3550                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3551                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3552                      break;
3553                   case 2:
3554                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3555                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3556                      break;
3557                   default:
3558                      vassert(0);
3559                }
3560             } else {
3561                switch(size) {
3562                   case 0:
3563                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3564                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3565                      break;
3566                   case 1:
3567                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3568                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3569                      break;
3570                   case 2:
3571                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3572                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3573                      break;
3574                   default:
3575                      vassert(0);
3576                }
3577             }
3578             assign(res,
3579                    binop(subOp,
3580                          binop(subOp,
3581                                binop(shOp, mkexpr(arg_n), mkU8(1)),
3582                                binop(shOp, mkexpr(arg_m), mkU8(1))),
3583                          binop(andOp,
3584                                binop(andOp,
3585                                      unop(notOp, mkexpr(arg_n)),
3586                                      mkexpr(arg_m)),
3587                                imm_val)));
3588             DIP("vhsub.%c%d %c%u, %c%u, %c%u\n",
3589                 U ? 'u' : 's', 8 << size,
3590                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3591                 mreg);
3592          } else {
3593             /* VQSUB */
3594             IROp op, op2;
3595             IRTemp tmp;
3596             if (Q) {
3597                switch (size) {
3598                   case 0:
3599                      op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
3600                      op2 = Iop_Sub8x16;
3601                      break;
3602                   case 1:
3603                      op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
3604                      op2 = Iop_Sub16x8;
3605                      break;
3606                   case 2:
3607                      op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
3608                      op2 = Iop_Sub32x4;
3609                      break;
3610                   case 3:
3611                      op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
3612                      op2 = Iop_Sub64x2;
3613                      break;
3614                   default:
3615                      vassert(0);
3616                }
3617             } else {
3618                switch (size) {
3619                   case 0:
3620                      op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
3621                      op2 = Iop_Sub8x8;
3622                      break;
3623                   case 1:
3624                      op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
3625                      op2 = Iop_Sub16x4;
3626                      break;
3627                   case 2:
3628                      op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
3629                      op2 = Iop_Sub32x2;
3630                      break;
3631                   case 3:
3632                      op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
3633                      op2 = Iop_Sub64;
3634                      break;
3635                   default:
3636                      vassert(0);
3637                }
3638             }
3639             if (Q)
3640                tmp = newTemp(Ity_V128);
3641             else
3642                tmp = newTemp(Ity_I64);
3643             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3644             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3645             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3646             DIP("vqsub.%c%d %c%u, %c%u, %c%u\n",
3647                 U ? 'u' : 's', 8 << size,
3648                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3649                 mreg);
3650          }
3651          break;
3652       case 3: {
3653             IROp op;
3654             if (Q) {
3655                switch (size) {
3656                   case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
3657                   case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
3658                   case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
3659                   case 3: return False;
3660                   default: vassert(0);
3661                }
3662             } else {
3663                switch (size) {
3664                   case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
3665                   case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
3666                   case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
3667                   case 3: return False;
3668                   default: vassert(0);
3669                }
3670             }
3671             if (B == 0) {
3672                /* VCGT  */
3673                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3674                DIP("vcgt.%c%d %c%u, %c%u, %c%u\n",
3675                    U ? 'u' : 's', 8 << size,
3676                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3677                    mreg);
3678             } else {
3679                /* VCGE  */
3680                /* VCGE res, argn, argm
3681                     is equal to
3682                   VCGT tmp, argm, argn
3683                   VNOT res, tmp */
3684                assign(res,
3685                       unop(Q ? Iop_NotV128 : Iop_Not64,
3686                            binop(op, mkexpr(arg_m), mkexpr(arg_n))));
3687                DIP("vcge.%c%d %c%u, %c%u, %c%u\n",
3688                    U ? 'u' : 's', 8 << size,
3689                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3690                    mreg);
3691             }
3692          }
3693          break;
3694       case 4:
3695          if (B == 0) {
3696             /* VSHL */
3697             IROp op = Iop_INVALID, sub_op = Iop_INVALID;
3698             IRTemp tmp = IRTemp_INVALID;
3699             if (U) {
3700                switch (size) {
3701                   case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
3702                   case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
3703                   case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
3704                   case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
3705                   default: vassert(0);
3706                }
3707             } else {
3708                tmp = newTemp(Q ? Ity_V128 : Ity_I64);
3709                switch (size) {
3710                   case 0:
3711                      op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3712                      sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3713                      break;
3714                   case 1:
3715                      op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3716                      sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3717                      break;
3718                   case 2:
3719                      op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3720                      sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3721                      break;
3722                   case 3:
3723                      op = Q ? Iop_Sar64x2 : Iop_Sar64;
3724                      sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
3725                      break;
3726                   default:
3727                      vassert(0);
3728                }
3729             }
3730             if (U) {
3731                if (!Q && (size == 3))
3732                   assign(res, binop(op, mkexpr(arg_m),
3733                                         unop(Iop_64to8, mkexpr(arg_n))));
3734                else
3735                   assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3736             } else {
3737                if (Q)
3738                   assign(tmp, binop(sub_op,
3739                                     binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
3740                                     mkexpr(arg_n)));
3741                else
3742                   assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
3743                if (!Q && (size == 3))
3744                   assign(res, binop(op, mkexpr(arg_m),
3745                                         unop(Iop_64to8, mkexpr(tmp))));
3746                else
3747                   assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
3748             }
3749             DIP("vshl.%c%d %c%u, %c%u, %c%u\n",
3750                 U ? 'u' : 's', 8 << size,
3751                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3752                 nreg);
3753          } else {
3754             /* VQSHL */
3755             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
3756             IRTemp tmp, shval, mask, old_shval;
3757             UInt i;
3758             ULong esize;
3759             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
3760             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3761             if (U) {
3762                switch (size) {
3763                   case 0:
3764                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
3765                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
3766                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3767                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3768                      break;
3769                   case 1:
3770                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
3771                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
3772                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3773                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3774                      break;
3775                   case 2:
3776                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
3777                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
3778                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3779                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3780                      break;
3781                   case 3:
3782                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
3783                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
3784                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3785                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3786                      break;
3787                   default:
3788                      vassert(0);
3789                }
3790             } else {
3791                switch (size) {
3792                   case 0:
3793                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
3794                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3795                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3796                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3797                      break;
3798                   case 1:
3799                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
3800                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3801                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3802                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3803                      break;
3804                   case 2:
3805                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
3806                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3807                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3808                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3809                      break;
3810                   case 3:
3811                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
3812                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
3813                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3814                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3815                      break;
3816                   default:
3817                      vassert(0);
3818                }
3819             }
3820             if (Q) {
3821                tmp = newTemp(Ity_V128);
3822                shval = newTemp(Ity_V128);
3823                mask = newTemp(Ity_V128);
3824             } else {
3825                tmp = newTemp(Ity_I64);
3826                shval = newTemp(Ity_I64);
3827                mask = newTemp(Ity_I64);
3828             }
3829             assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3830             /* Only least significant byte from second argument is used.
3831                Copy this byte to the whole vector element. */
3832             assign(shval, binop(op_shrn,
3833                                 binop(op_shln,
3834                                        mkexpr(arg_n),
3835                                        mkU8((8 << size) - 8)),
3836                                 mkU8((8 << size) - 8)));
3837             for(i = 0; i < size; i++) {
3838                old_shval = shval;
3839                shval = newTemp(Q ? Ity_V128 : Ity_I64);
3840                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3841                                    mkexpr(old_shval),
3842                                    binop(op_shln,
3843                                          mkexpr(old_shval),
3844                                          mkU8(8 << i))));
3845             }
3846             /* If shift is greater or equal to the element size and
3847                element is non-zero, then QC flag should be set. */
3848             esize = (8 << size) - 1;
3849             esize = (esize <<  8) | esize;
3850             esize = (esize << 16) | esize;
3851             esize = (esize << 32) | esize;
3852             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3853                              binop(cmp_gt, mkexpr(shval),
3854                                            Q ? mkU128(esize) : mkU64(esize)),
3855                              unop(cmp_neq, mkexpr(arg_m))),
3856                        Q ? mkU128(0) : mkU64(0),
3857                        Q, condT);
3858             /* Othervise QC flag should be set if shift value is positive and
3859                result beign rightshifted the same value is not equal to left
3860                argument. */
3861             assign(mask, binop(cmp_gt, mkexpr(shval),
3862                                        Q ? mkU128(0) : mkU64(0)));
3863             if (!Q && size == 3)
3864                assign(tmp, binop(op_rev, mkexpr(res),
3865                                          unop(Iop_64to8, mkexpr(arg_n))));
3866             else
3867                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
3868             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3869                              mkexpr(tmp), mkexpr(mask)),
3870                        binop(Q ? Iop_AndV128 : Iop_And64,
3871                              mkexpr(arg_m), mkexpr(mask)),
3872                        Q, condT);
3873             DIP("vqshl.%c%d %c%u, %c%u, %c%u\n",
3874                 U ? 'u' : 's', 8 << size,
3875                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3876                 nreg);
3877          }
3878          break;
3879       case 5:
3880          if (B == 0) {
3881             /* VRSHL */
3882             IROp op, op_shrn, op_shln, cmp_gt, op_add;
3883             IRTemp shval, old_shval, imm_val, round;
3884             UInt i;
3885             ULong imm;
3886             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3887             imm = 1L;
3888             switch (size) {
3889                case 0: imm = (imm <<  8) | imm; /* fall through */
3890                case 1: imm = (imm << 16) | imm; /* fall through */
3891                case 2: imm = (imm << 32) | imm; /* fall through */
3892                case 3: break;
3893                default: vassert(0);
3894             }
3895             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
3896             round = newTemp(Q ? Ity_V128 : Ity_I64);
3897             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
3898             if (U) {
3899                switch (size) {
3900                   case 0:
3901                      op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
3902                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3903                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3904                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3905                      break;
3906                   case 1:
3907                      op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
3908                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3909                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3910                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3911                      break;
3912                   case 2:
3913                      op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
3914                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3915                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3916                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3917                      break;
3918                   case 3:
3919                      op = Q ? Iop_Shl64x2 : Iop_Shl64;
3920                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
3921                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3922                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3923                      break;
3924                   default:
3925                      vassert(0);
3926                }
3927             } else {
3928                switch (size) {
3929                   case 0:
3930                      op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
3931                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3932                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3933                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3934                      break;
3935                   case 1:
3936                      op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
3937                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3938                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3939                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3940                      break;
3941                   case 2:
3942                      op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
3943                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3944                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3945                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3946                      break;
3947                   case 3:
3948                      op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
3949                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
3950                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3951                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3952                      break;
3953                   default:
3954                      vassert(0);
3955                }
3956             }
3957             if (Q) {
3958                shval = newTemp(Ity_V128);
3959             } else {
3960                shval = newTemp(Ity_I64);
3961             }
3962             /* Only least significant byte from second argument is used.
3963                Copy this byte to the whole vector element. */
3964             assign(shval, binop(op_shrn,
3965                                 binop(op_shln,
3966                                        mkexpr(arg_n),
3967                                        mkU8((8 << size) - 8)),
3968                                 mkU8((8 << size) - 8)));
3969             for (i = 0; i < size; i++) {
3970                old_shval = shval;
3971                shval = newTemp(Q ? Ity_V128 : Ity_I64);
3972                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3973                                    mkexpr(old_shval),
3974                                    binop(op_shln,
3975                                          mkexpr(old_shval),
3976                                          mkU8(8 << i))));
3977             }
3978             /* Compute the result */
3979             if (!Q && size == 3 && U) {
3980                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3981                                    binop(op,
3982                                          mkexpr(arg_m),
3983                                          unop(Iop_64to8,
3984                                               binop(op_add,
3985                                                     mkexpr(arg_n),
3986                                                     mkexpr(imm_val)))),
3987                                    binop(Q ? Iop_AndV128 : Iop_And64,
3988                                          mkexpr(imm_val),
3989                                          binop(cmp_gt,
3990                                                Q ? mkU128(0) : mkU64(0),
3991                                                mkexpr(arg_n)))));
3992                assign(res, binop(op_add,
3993                                  binop(op,
3994                                        mkexpr(arg_m),
3995                                        unop(Iop_64to8, mkexpr(arg_n))),
3996                                  mkexpr(round)));
3997             } else {
3998                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3999                                    binop(op,
4000                                          mkexpr(arg_m),
4001                                          binop(op_add,
4002                                                mkexpr(arg_n),
4003                                                mkexpr(imm_val))),
4004                                    binop(Q ? Iop_AndV128 : Iop_And64,
4005                                          mkexpr(imm_val),
4006                                          binop(cmp_gt,
4007                                                Q ? mkU128(0) : mkU64(0),
4008                                                mkexpr(arg_n)))));
4009                assign(res, binop(op_add,
4010                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4011                                  mkexpr(round)));
4012             }
4013             DIP("vrshl.%c%d %c%u, %c%u, %c%u\n",
4014                 U ? 'u' : 's', 8 << size,
4015                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
4016                 nreg);
4017          } else {
4018             /* VQRSHL */
4019             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
4020             IRTemp tmp, shval, mask, old_shval, imm_val, round;
4021             UInt i;
4022             ULong esize, imm;
4023             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
4024             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
4025             imm = 1L;
4026             switch (size) {
4027                case 0: imm = (imm <<  8) | imm; /* fall through */
4028                case 1: imm = (imm << 16) | imm; /* fall through */
4029                case 2: imm = (imm << 32) | imm; /* fall through */
4030                case 3: break;
4031                default: vassert(0);
4032             }
4033             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
4034             round = newTemp(Q ? Ity_V128 : Ity_I64);
4035             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
4036             if (U) {
4037                switch (size) {
4038                   case 0:
4039                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
4040                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4041                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
4042                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4043                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4044                      break;
4045                   case 1:
4046                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
4047                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4048                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
4049                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4050                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4051                      break;
4052                   case 2:
4053                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
4054                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4055                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
4056                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4057                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4058                      break;
4059                   case 3:
4060                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
4061                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
4062                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
4063                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4064                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4065                      break;
4066                   default:
4067                      vassert(0);
4068                }
4069             } else {
4070                switch (size) {
4071                   case 0:
4072                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
4073                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4074                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
4075                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4076                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4077                      break;
4078                   case 1:
4079                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
4080                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4081                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
4082                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4083                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4084                      break;
4085                   case 2:
4086                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
4087                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4088                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
4089                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4090                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4091                      break;
4092                   case 3:
4093                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
4094                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
4095                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
4096                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4097                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4098                      break;
4099                   default:
4100                      vassert(0);
4101                }
4102             }
4103             if (Q) {
4104                tmp = newTemp(Ity_V128);
4105                shval = newTemp(Ity_V128);
4106                mask = newTemp(Ity_V128);
4107             } else {
4108                tmp = newTemp(Ity_I64);
4109                shval = newTemp(Ity_I64);
4110                mask = newTemp(Ity_I64);
4111             }
4112             /* Only least significant byte from second argument is used.
4113                Copy this byte to the whole vector element. */
4114             assign(shval, binop(op_shrn,
4115                                 binop(op_shln,
4116                                        mkexpr(arg_n),
4117                                        mkU8((8 << size) - 8)),
4118                                 mkU8((8 << size) - 8)));
4119             for (i = 0; i < size; i++) {
4120                old_shval = shval;
4121                shval = newTemp(Q ? Ity_V128 : Ity_I64);
4122                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
4123                                    mkexpr(old_shval),
4124                                    binop(op_shln,
4125                                          mkexpr(old_shval),
4126                                          mkU8(8 << i))));
4127             }
4128             /* Compute the result */
4129             assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
4130                                 binop(op,
4131                                       mkexpr(arg_m),
4132                                       binop(op_add,
4133                                             mkexpr(arg_n),
4134                                             mkexpr(imm_val))),
4135                                 binop(Q ? Iop_AndV128 : Iop_And64,
4136                                       mkexpr(imm_val),
4137                                       binop(cmp_gt,
4138                                             Q ? mkU128(0) : mkU64(0),
4139                                             mkexpr(arg_n)))));
4140             assign(res, binop(op_add,
4141                               binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4142                               mkexpr(round)));
4143             /* If shift is greater or equal to the element size and element is
4144                non-zero, then QC flag should be set. */
4145             esize = (8 << size) - 1;
4146             esize = (esize <<  8) | esize;
4147             esize = (esize << 16) | esize;
4148             esize = (esize << 32) | esize;
4149             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4150                              binop(cmp_gt, mkexpr(shval),
4151                                            Q ? mkU128(esize) : mkU64(esize)),
4152                              unop(cmp_neq, mkexpr(arg_m))),
4153                        Q ? mkU128(0) : mkU64(0),
4154                        Q, condT);
4155             /* Othervise QC flag should be set if shift value is positive and
4156                result beign rightshifted the same value is not equal to left
4157                argument. */
4158             assign(mask, binop(cmp_gt, mkexpr(shval),
4159                                Q ? mkU128(0) : mkU64(0)));
4160             if (!Q && size == 3)
4161                assign(tmp, binop(op_rev, mkexpr(res),
4162                                          unop(Iop_64to8, mkexpr(arg_n))));
4163             else
4164                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
4165             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4166                              mkexpr(tmp), mkexpr(mask)),
4167                        binop(Q ? Iop_AndV128 : Iop_And64,
4168                              mkexpr(arg_m), mkexpr(mask)),
4169                        Q, condT);
4170             DIP("vqrshl.%c%d %c%u, %c%u, %c%u\n",
4171                 U ? 'u' : 's', 8 << size,
4172                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
4173                 nreg);
4174          }
4175          break;
4176       case 6:
4177          /* VMAX, VMIN  */
4178          if (B == 0) {
4179             /* VMAX */
4180             IROp op;
4181             if (U == 0) {
4182                switch (size) {
4183                   case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
4184                   case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
4185                   case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
4186                   case 3: return False;
4187                   default: vassert(0);
4188                }
4189             } else {
4190                switch (size) {
4191                   case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
4192                   case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
4193                   case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
4194                   case 3: return False;
4195                   default: vassert(0);
4196                }
4197             }
4198             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4199             DIP("vmax.%c%d %c%u, %c%u, %c%u\n",
4200                 U ? 'u' : 's', 8 << size,
4201                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4202                 mreg);
4203          } else {
4204             /* VMIN */
4205             IROp op;
4206             if (U == 0) {
4207                switch (size) {
4208                   case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
4209                   case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
4210                   case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
4211                   case 3: return False;
4212                   default: vassert(0);
4213                }
4214             } else {
4215                switch (size) {
4216                   case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
4217                   case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
4218                   case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
4219                   case 3: return False;
4220                   default: vassert(0);
4221                }
4222             }
4223             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4224             DIP("vmin.%c%d %c%u, %c%u, %c%u\n",
4225                 U ? 'u' : 's', 8 << size,
4226                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4227                 mreg);
4228          }
4229          break;
4230       case 7:
4231          if (B == 0) {
4232             /* VABD */
4233             IROp op_cmp, op_sub;
4234             IRTemp cond;
4235             if ((theInstr >> 23) & 1) {
4236                vpanic("VABDL should not be in dis_neon_data_3same\n");
4237             }
4238             if (Q) {
4239                switch (size) {
4240                   case 0:
4241                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4242                      op_sub = Iop_Sub8x16;
4243                      break;
4244                   case 1:
4245                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4246                      op_sub = Iop_Sub16x8;
4247                      break;
4248                   case 2:
4249                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4250                      op_sub = Iop_Sub32x4;
4251                      break;
4252                   case 3:
4253                      return False;
4254                   default:
4255                      vassert(0);
4256                }
4257             } else {
4258                switch (size) {
4259                   case 0:
4260                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4261                      op_sub = Iop_Sub8x8;
4262                      break;
4263                   case 1:
4264                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4265                      op_sub = Iop_Sub16x4;
4266                      break;
4267                   case 2:
4268                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4269                      op_sub = Iop_Sub32x2;
4270                      break;
4271                   case 3:
4272                      return False;
4273                   default:
4274                      vassert(0);
4275                }
4276             }
4277             if (Q) {
4278                cond = newTemp(Ity_V128);
4279             } else {
4280                cond = newTemp(Ity_I64);
4281             }
4282             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4283             assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
4284                               binop(Q ? Iop_AndV128 : Iop_And64,
4285                                     binop(op_sub, mkexpr(arg_n),
4286                                                   mkexpr(arg_m)),
4287                                     mkexpr(cond)),
4288                               binop(Q ? Iop_AndV128 : Iop_And64,
4289                                     binop(op_sub, mkexpr(arg_m),
4290                                                   mkexpr(arg_n)),
4291                                     unop(Q ? Iop_NotV128 : Iop_Not64,
4292                                          mkexpr(cond)))));
4293             DIP("vabd.%c%d %c%u, %c%u, %c%u\n",
4294                 U ? 'u' : 's', 8 << size,
4295                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4296                 mreg);
4297          } else {
4298             /* VABA */
4299             IROp op_cmp, op_sub, op_add;
4300             IRTemp cond, acc, tmp;
4301             if ((theInstr >> 23) & 1) {
4302                vpanic("VABAL should not be in dis_neon_data_3same");
4303             }
4304             if (Q) {
4305                switch (size) {
4306                   case 0:
4307                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4308                      op_sub = Iop_Sub8x16;
4309                      op_add = Iop_Add8x16;
4310                      break;
4311                   case 1:
4312                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4313                      op_sub = Iop_Sub16x8;
4314                      op_add = Iop_Add16x8;
4315                      break;
4316                   case 2:
4317                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4318                      op_sub = Iop_Sub32x4;
4319                      op_add = Iop_Add32x4;
4320                      break;
4321                   case 3:
4322                      return False;
4323                   default:
4324                      vassert(0);
4325                }
4326             } else {
4327                switch (size) {
4328                   case 0:
4329                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4330                      op_sub = Iop_Sub8x8;
4331                      op_add = Iop_Add8x8;
4332                      break;
4333                   case 1:
4334                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4335                      op_sub = Iop_Sub16x4;
4336                      op_add = Iop_Add16x4;
4337                      break;
4338                   case 2:
4339                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4340                      op_sub = Iop_Sub32x2;
4341                      op_add = Iop_Add32x2;
4342                      break;
4343                   case 3:
4344                      return False;
4345                   default:
4346                      vassert(0);
4347                }
4348             }
4349             if (Q) {
4350                cond = newTemp(Ity_V128);
4351                acc = newTemp(Ity_V128);
4352                tmp = newTemp(Ity_V128);
4353                assign(acc, getQReg(dreg));
4354             } else {
4355                cond = newTemp(Ity_I64);
4356                acc = newTemp(Ity_I64);
4357                tmp = newTemp(Ity_I64);
4358                assign(acc, getDRegI64(dreg));
4359             }
4360             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4361             assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
4362                               binop(Q ? Iop_AndV128 : Iop_And64,
4363                                     binop(op_sub, mkexpr(arg_n),
4364                                                   mkexpr(arg_m)),
4365                                     mkexpr(cond)),
4366                               binop(Q ? Iop_AndV128 : Iop_And64,
4367                                     binop(op_sub, mkexpr(arg_m),
4368                                                   mkexpr(arg_n)),
4369                                     unop(Q ? Iop_NotV128 : Iop_Not64,
4370                                          mkexpr(cond)))));
4371             assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
4372             DIP("vaba.%c%d %c%u, %c%u, %c%u\n",
4373                 U ? 'u' : 's', 8 << size,
4374                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4375                 mreg);
4376          }
4377          break;
4378       case 8:
4379          if (B == 0) {
4380             IROp op;
4381             if (U == 0) {
4382                /* VADD  */
4383                switch (size) {
4384                   case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
4385                   case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
4386                   case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
4387                   case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
4388                   default: vassert(0);
4389                }
4390                DIP("vadd.i%d %c%u, %c%u, %c%u\n",
4391                    8 << size, Q ? 'q' : 'd',
4392                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4393             } else {
4394                /* VSUB  */
4395                switch (size) {
4396                   case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
4397                   case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
4398                   case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
4399                   case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
4400                   default: vassert(0);
4401                }
4402                DIP("vsub.i%d %c%u, %c%u, %c%u\n",
4403                    8 << size, Q ? 'q' : 'd',
4404                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4405             }
4406             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4407          } else {
4408             IROp op;
4409             switch (size) {
4410                case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
4411                case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
4412                case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
4413                case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
4414                default: vassert(0);
4415             }
4416             if (U == 0) {
4417                /* VTST  */
4418                assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
4419                                           mkexpr(arg_n),
4420                                           mkexpr(arg_m))));
4421                DIP("vtst.%d %c%u, %c%u, %c%u\n",
4422                    8 << size, Q ? 'q' : 'd',
4423                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4424             } else {
4425                /* VCEQ  */
4426                assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
4427                                 unop(op,
4428                                      binop(Q ? Iop_XorV128 : Iop_Xor64,
4429                                            mkexpr(arg_n),
4430                                            mkexpr(arg_m)))));
4431                DIP("vceq.i%d %c%u, %c%u, %c%u\n",
4432                    8 << size, Q ? 'q' : 'd',
4433                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4434             }
4435          }
4436          break;
4437       case 9:
4438          if (B == 0) {
4439             /* VMLA, VMLS (integer) */
4440             IROp op, op2;
4441             UInt P = (theInstr >> 24) & 1;
4442             if (P) {
4443                switch (size) {
4444                   case 0:
4445                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4446                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
4447                      break;
4448                   case 1:
4449                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4450                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
4451                      break;
4452                   case 2:
4453                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4454                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
4455                      break;
4456                   case 3:
4457                      return False;
4458                   default:
4459                      vassert(0);
4460                }
4461             } else {
4462                switch (size) {
4463                   case 0:
4464                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4465                      op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
4466                      break;
4467                   case 1:
4468                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4469                      op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
4470                      break;
4471                   case 2:
4472                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4473                      op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
4474                      break;
4475                   case 3:
4476                      return False;
4477                   default:
4478                      vassert(0);
4479                }
4480             }
4481             assign(res, binop(op2,
4482                               Q ? getQReg(dreg) : getDRegI64(dreg),
4483                               binop(op, mkexpr(arg_n), mkexpr(arg_m))));
4484             DIP("vml%c.i%d %c%u, %c%u, %c%u\n",
4485                 P ? 's' : 'a', 8 << size,
4486                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4487                 mreg);
4488          } else {
4489             /* VMUL */
4490             IROp op;
4491             UInt P = (theInstr >> 24) & 1;
4492             if (P) {
4493                switch (size) {
4494                   case 0:
4495                      op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
4496                      break;
4497                   case 1: case 2: case 3: return False;
4498                   default: vassert(0);
4499                }
4500             } else {
4501                switch (size) {
4502                   case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
4503                   case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
4504                   case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
4505                   case 3: return False;
4506                   default: vassert(0);
4507                }
4508             }
4509             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4510             DIP("vmul.%c%d %c%u, %c%u, %c%u\n",
4511                 P ? 'p' : 'i', 8 << size,
4512                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4513                 mreg);
4514          }
4515          break;
4516       case 10: {
4517          /* VPMAX, VPMIN  */
4518          UInt P = (theInstr >> 4) & 1;
4519          IROp op;
4520          if (Q)
4521             return False;
4522          if (P) {
4523             switch (size) {
4524                case 0: op = U ? Iop_PwMin8Ux8  : Iop_PwMin8Sx8; break;
4525                case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
4526                case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
4527                case 3: return False;
4528                default: vassert(0);
4529             }
4530          } else {
4531             switch (size) {
4532                case 0: op = U ? Iop_PwMax8Ux8  : Iop_PwMax8Sx8; break;
4533                case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
4534                case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
4535                case 3: return False;
4536                default: vassert(0);
4537             }
4538          }
4539          assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4540          DIP("vp%s.%c%d %c%u, %c%u, %c%u\n",
4541              P ? "min" : "max", U ? 'u' : 's',
4542              8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4543              Q ? 'q' : 'd', mreg);
4544          break;
4545       }
4546       case 11:
4547          if (B == 0) {
4548             if (U == 0) {
4549                /* VQDMULH  */
4550                IROp op ,op2;
4551                ULong imm;
4552                switch (size) {
4553                   case 0: case 3:
4554                      return False;
4555                   case 1:
4556                      op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
4557                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4558                      imm = 1LL << 15;
4559                      imm = (imm << 16) | imm;
4560                      imm = (imm << 32) | imm;
4561                      break;
4562                   case 2:
4563                      op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
4564                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4565                      imm = 1LL << 31;
4566                      imm = (imm << 32) | imm;
4567                      break;
4568                   default:
4569                      vassert(0);
4570                }
4571                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4572                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4573                                 binop(op2, mkexpr(arg_n),
4574                                            Q ? mkU128(imm) : mkU64(imm)),
4575                                 binop(op2, mkexpr(arg_m),
4576                                            Q ? mkU128(imm) : mkU64(imm))),
4577                           Q ? mkU128(0) : mkU64(0),
4578                           Q, condT);
4579                DIP("vqdmulh.s%d %c%u, %c%u, %c%u\n",
4580                    8 << size, Q ? 'q' : 'd',
4581                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4582             } else {
4583                /* VQRDMULH */
4584                IROp op ,op2;
4585                ULong imm;
4586                switch(size) {
4587                   case 0: case 3:
4588                      return False;
4589                   case 1:
4590                      imm = 1LL << 15;
4591                      imm = (imm << 16) | imm;
4592                      imm = (imm << 32) | imm;
4593                      op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
4594                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4595                      break;
4596                   case 2:
4597                      imm = 1LL << 31;
4598                      imm = (imm << 32) | imm;
4599                      op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
4600                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4601                      break;
4602                   default:
4603                      vassert(0);
4604                }
4605                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4606                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4607                                 binop(op2, mkexpr(arg_n),
4608                                            Q ? mkU128(imm) : mkU64(imm)),
4609                                 binop(op2, mkexpr(arg_m),
4610                                            Q ? mkU128(imm) : mkU64(imm))),
4611                           Q ? mkU128(0) : mkU64(0),
4612                           Q, condT);
4613                DIP("vqrdmulh.s%d %c%u, %c%u, %c%u\n",
4614                    8 << size, Q ? 'q' : 'd',
4615                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4616             }
4617          } else {
4618             if (U == 0) {
4619                /* VPADD */
4620                IROp op;
4621                if (Q)
4622                   return False;
4623                switch (size) {
4624                   case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8;  break;
4625                   case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
4626                   case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
4627                   case 3: return False;
4628                   default: vassert(0);
4629                }
4630                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4631                DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
4632                    8 << size, Q ? 'q' : 'd',
4633                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4634             } else {
4635                return False;
4636             }
4637          }
4638          break;
4639       case 12: {
4640          return False;
4641       }
4642       /* Starting from here these are FP SIMD cases */
4643       case 13:
4644          if (B == 0) {
4645             IROp op;
4646             if (U == 0) {
4647                if ((C >> 1) == 0) {
4648                   /* VADD  */
4649                   op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
4650                   DIP("vadd.f32 %c%u, %c%u, %c%u\n",
4651                       Q ? 'q' : 'd', dreg,
4652                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4653                } else {
4654                   /* VSUB  */
4655                   op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
4656                   DIP("vsub.f32 %c%u, %c%u, %c%u\n",
4657                       Q ? 'q' : 'd', dreg,
4658                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4659                }
4660             } else {
4661                if ((C >> 1) == 0) {
4662                   /* VPADD */
4663                   if (Q)
4664                      return False;
4665                   op = Iop_PwAdd32Fx2;
4666                   DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4667                } else {
4668                   /* VABD  */
4669                   if (Q) {
4670                      assign(res, unop(Iop_Abs32Fx4,
4671                                       triop(Iop_Sub32Fx4,
4672                                             get_FAKE_roundingmode(),
4673                                             mkexpr(arg_n),
4674                                             mkexpr(arg_m))));
4675                   } else {
4676                      assign(res, unop(Iop_Abs32Fx2,
4677                                       binop(Iop_Sub32Fx2,
4678                                             mkexpr(arg_n),
4679                                             mkexpr(arg_m))));
4680                   }
4681                   DIP("vabd.f32 %c%u, %c%u, %c%u\n",
4682                       Q ? 'q' : 'd', dreg,
4683                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4684                   break;
4685                }
4686             }
4687             assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4688          } else {
4689             if (U == 0) {
4690                /* VMLA, VMLS  */
4691                IROp op, op2;
4692                UInt P = (theInstr >> 21) & 1;
4693                if (P) {
4694                   switch (size & 1) {
4695                      case 0:
4696                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4697                         op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
4698                         break;
4699                      case 1: return False;
4700                      default: vassert(0);
4701                   }
4702                } else {
4703                   switch (size & 1) {
4704                      case 0:
4705                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4706                         op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
4707                         break;
4708                      case 1: return False;
4709                      default: vassert(0);
4710                   }
4711                }
4712                assign(res, binop_w_fake_RM(
4713                               op2,
4714                               Q ? getQReg(dreg) : getDRegI64(dreg),
4715                               binop_w_fake_RM(op, mkexpr(arg_n),
4716                                                   mkexpr(arg_m))));
4717
4718                DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
4719                    P ? 's' : 'a', Q ? 'q' : 'd',
4720                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4721             } else {
4722                /* VMUL  */
4723                IROp op;
4724                if ((C >> 1) != 0)
4725                   return False;
4726                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
4727                assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4728                DIP("vmul.f32 %c%u, %c%u, %c%u\n",
4729                    Q ? 'q' : 'd', dreg,
4730                    Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4731             }
4732          }
4733          break;
4734       case 14:
4735          if (B == 0) {
4736             if (U == 0) {
4737                if ((C >> 1) == 0) {
4738                   /* VCEQ  */
4739                   IROp op;
4740                   if ((theInstr >> 20) & 1)
4741                      return False;
4742                   op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2;
4743                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4744                   DIP("vceq.f32 %c%u, %c%u, %c%u\n",
4745                       Q ? 'q' : 'd', dreg,
4746                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4747                } else {
4748                   return False;
4749                }
4750             } else {
4751                if ((C >> 1) == 0) {
4752                   /* VCGE  */
4753                   IROp op;
4754                   if ((theInstr >> 20) & 1)
4755                      return False;
4756                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4757                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4758                   DIP("vcge.f32 %c%u, %c%u, %c%u\n",
4759                       Q ? 'q' : 'd', dreg,
4760                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4761                } else {
4762                   /* VCGT  */
4763                   IROp op;
4764                   if ((theInstr >> 20) & 1)
4765                      return False;
4766                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4767                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4768                   DIP("vcgt.f32 %c%u, %c%u, %c%u\n",
4769                       Q ? 'q' : 'd', dreg,
4770                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4771                }
4772             }
4773          } else {
4774             if (U == 1) {
4775                /* VACGE, VACGT */
4776                UInt op_bit = (theInstr >> 21) & 1;
4777                IROp op, op2;
4778                op2 = Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2;
4779                if (op_bit) {
4780                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4781                   assign(res, binop(op,
4782                                     unop(op2, mkexpr(arg_n)),
4783                                     unop(op2, mkexpr(arg_m))));
4784                } else {
4785                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4786                   assign(res, binop(op,
4787                                     unop(op2, mkexpr(arg_n)),
4788                                     unop(op2, mkexpr(arg_m))));
4789                }
4790                DIP("vacg%c.f32 %c%u, %c%u, %c%u\n", op_bit ? 't' : 'e',
4791                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4792                    Q ? 'q' : 'd', mreg);
4793             } else {
4794                return False;
4795             }
4796          }
4797          break;
4798       case 15:
4799          if (B == 0) {
4800             if (U == 0) {
4801                /* VMAX, VMIN  */
4802                IROp op;
4803                if ((theInstr >> 20) & 1)
4804                   return False;
4805                if ((theInstr >> 21) & 1) {
4806                   op = Q ? Iop_Min32Fx4 : Iop_Min32Fx2;
4807                   DIP("vmin.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4808                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4809                } else {
4810                   op = Q ? Iop_Max32Fx4 : Iop_Max32Fx2;
4811                   DIP("vmax.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4812                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4813                }
4814                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4815             } else {
4816                /* VPMAX, VPMIN   */
4817                IROp op;
4818                if (Q)
4819                   return False;
4820                if ((theInstr >> 20) & 1)
4821                   return False;
4822                if ((theInstr >> 21) & 1) {
4823                   op = Iop_PwMin32Fx2;
4824                   DIP("vpmin.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4825                } else {
4826                   op = Iop_PwMax32Fx2;
4827                   DIP("vpmax.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4828                }
4829                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4830             }
4831          } else {
4832             if (U == 0) {
4833                if ((C >> 1) == 0) {
4834                   /* VRECPS */
4835                   if ((theInstr >> 20) & 1)
4836                      return False;
4837                   assign(res, binop(Q ? Iop_RecipStep32Fx4
4838                                       : Iop_RecipStep32Fx2,
4839                                     mkexpr(arg_n),
4840                                     mkexpr(arg_m)));
4841                   DIP("vrecps.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4842                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4843                } else {
4844                   /* VRSQRTS  */
4845                   if ((theInstr >> 20) & 1)
4846                      return False;
4847                   assign(res, binop(Q ? Iop_RSqrtStep32Fx4
4848                                       : Iop_RSqrtStep32Fx2,
4849                                     mkexpr(arg_n),
4850                                     mkexpr(arg_m)));
4851                   DIP("vrsqrts.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4852                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4853                }
4854             } else {
4855                return False;
4856             }
4857          }
4858          break;
4859       default:
4860          /*NOTREACHED*/
4861          vassert(0);
4862    }
4863
4864    if (Q) {
4865       putQReg(dreg, mkexpr(res), condT);
4866    } else {
4867       putDRegI64(dreg, mkexpr(res), condT);
4868    }
4869
4870    return True;
4871 }
4872
4873 /* A7.4.2 Three registers of different length */
4874 static
4875 Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
4876 {
4877    /* In paths where this returns False, indicating a non-decodable
4878       instruction, there may still be some IR assignments to temporaries
4879       generated.  This is inconvenient but harmless, and the post-front-end
4880       IR optimisation pass will just remove them anyway.  So there's no
4881       effort made here to tidy it up.
4882    */
4883    UInt A = (theInstr >> 8) & 0xf;
4884    UInt B = (theInstr >> 20) & 3;
4885    UInt U = (theInstr >> 24) & 1;
4886    UInt P = (theInstr >> 9) & 1;
4887    UInt mreg = get_neon_m_regno(theInstr);
4888    UInt nreg = get_neon_n_regno(theInstr);
4889    UInt dreg = get_neon_d_regno(theInstr);
4890    UInt size = B;
4891    ULong imm;
4892    IRTemp res, arg_m, arg_n, cond, tmp;
4893    IROp cvt, cvt2, cmp, op, op2, sh, add;
4894    switch (A) {
4895       case 0: case 1: case 2: case 3:
4896          /* VADDL, VADDW, VSUBL, VSUBW */
4897          if (dreg & 1)
4898             return False;
4899          dreg >>= 1;
4900          size = B;
4901          switch (size) {
4902             case 0:
4903                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4904                op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
4905                break;
4906             case 1:
4907                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
4908                op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
4909                break;
4910             case 2:
4911                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
4912                op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
4913                break;
4914             case 3:
4915                return False;
4916             default:
4917                vassert(0);
4918          }
4919          arg_n = newTemp(Ity_V128);
4920          arg_m = newTemp(Ity_V128);
4921          if (A & 1) {
4922             if (nreg & 1)
4923                return False;
4924             nreg >>= 1;
4925             assign(arg_n, getQReg(nreg));
4926          } else {
4927             assign(arg_n, unop(cvt, getDRegI64(nreg)));
4928          }
4929          assign(arg_m, unop(cvt, getDRegI64(mreg)));
4930          putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
4931                        condT);
4932          DIP("v%s%c.%c%d q%u, %c%u, d%u\n", (A & 2) ? "sub" : "add",
4933              (A & 1) ? 'w' : 'l', U ? 'u' : 's', 8 << size, dreg,
4934              (A & 1) ? 'q' : 'd', nreg, mreg);
4935          return True;
4936       case 4:
4937          /* VADDHN, VRADDHN */
4938          if (mreg & 1)
4939             return False;
4940          mreg >>= 1;
4941          if (nreg & 1)
4942             return False;
4943          nreg >>= 1;
4944          size = B;
4945          switch (size) {
4946             case 0:
4947                op = Iop_Add16x8;
4948                cvt = Iop_NarrowUn16to8x8;
4949                sh = Iop_ShrN16x8;
4950                imm = 1U << 7;
4951                imm = (imm << 16) | imm;
4952                imm = (imm << 32) | imm;
4953                break;
4954             case 1:
4955                op = Iop_Add32x4;
4956                cvt = Iop_NarrowUn32to16x4;
4957                sh = Iop_ShrN32x4;
4958                imm = 1U << 15;
4959                imm = (imm << 32) | imm;
4960                break;
4961             case 2:
4962                op = Iop_Add64x2;
4963                cvt = Iop_NarrowUn64to32x2;
4964                sh = Iop_ShrN64x2;
4965                imm = 1U << 31;
4966                break;
4967             case 3:
4968                return False;
4969             default:
4970                vassert(0);
4971          }
4972          tmp = newTemp(Ity_V128);
4973          res = newTemp(Ity_V128);
4974          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
4975          if (U) {
4976             /* VRADDHN */
4977             assign(res, binop(op, mkexpr(tmp),
4978                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
4979          } else {
4980             assign(res, mkexpr(tmp));
4981          }
4982          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
4983                     condT);
4984          DIP("v%saddhn.i%d d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
4985              nreg, mreg);
4986          return True;
4987       case 5:
4988          /* VABAL */
4989          if (!((theInstr >> 23) & 1)) {
4990             vpanic("VABA should not be in dis_neon_data_3diff\n");
4991          }
4992          if (dreg & 1)
4993             return False;
4994          dreg >>= 1;
4995          switch (size) {
4996             case 0:
4997                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4998                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4999                cvt2 = Iop_Widen8Sto16x8;
5000                op = Iop_Sub16x8;
5001                op2 = Iop_Add16x8;
5002                break;
5003             case 1:
5004                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
5005                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
5006                cvt2 = Iop_Widen16Sto32x4;
5007                op = Iop_Sub32x4;
5008                op2 = Iop_Add32x4;
5009                break;
5010             case 2:
5011                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
5012                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
5013                cvt2 = Iop_Widen32Sto64x2;
5014                op = Iop_Sub64x2;
5015                op2 = Iop_Add64x2;
5016                break;
5017             case 3:
5018                return False;
5019             default:
5020                vassert(0);
5021          }
5022          arg_n = newTemp(Ity_V128);
5023          arg_m = newTemp(Ity_V128);
5024          cond = newTemp(Ity_V128);
5025          res = newTemp(Ity_V128);
5026          assign(arg_n, unop(cvt, getDRegI64(nreg)));
5027          assign(arg_m, unop(cvt, getDRegI64(mreg)));
5028          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
5029                                             getDRegI64(mreg))));
5030          assign(res, binop(op2,
5031                            binop(Iop_OrV128,
5032                                  binop(Iop_AndV128,
5033                                        binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5034                                        mkexpr(cond)),
5035                                  binop(Iop_AndV128,
5036                                        binop(op, mkexpr(arg_m), mkexpr(arg_n)),
5037                                        unop(Iop_NotV128, mkexpr(cond)))),
5038                            getQReg(dreg)));
5039          putQReg(dreg, mkexpr(res), condT);
5040          DIP("vabal.%c%d q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
5041              nreg, mreg);
5042          return True;
5043       case 6:
5044          /* VSUBHN, VRSUBHN */
5045          if (mreg & 1)
5046             return False;
5047          mreg >>= 1;
5048          if (nreg & 1)
5049             return False;
5050          nreg >>= 1;
5051          size = B;
5052          switch (size) {
5053             case 0:
5054                op = Iop_Sub16x8;
5055                op2 = Iop_Add16x8;
5056                cvt = Iop_NarrowUn16to8x8;
5057                sh = Iop_ShrN16x8;
5058                imm = 1U << 7;
5059                imm = (imm << 16) | imm;
5060                imm = (imm << 32) | imm;
5061                break;
5062             case 1:
5063                op = Iop_Sub32x4;
5064                op2 = Iop_Add32x4;
5065                cvt = Iop_NarrowUn32to16x4;
5066                sh = Iop_ShrN32x4;
5067                imm = 1U << 15;
5068                imm = (imm << 32) | imm;
5069                break;
5070             case 2:
5071                op = Iop_Sub64x2;
5072                op2 = Iop_Add64x2;
5073                cvt = Iop_NarrowUn64to32x2;
5074                sh = Iop_ShrN64x2;
5075                imm = 1U << 31;
5076                break;
5077             case 3:
5078                return False;
5079             default:
5080                vassert(0);
5081          }
5082          tmp = newTemp(Ity_V128);
5083          res = newTemp(Ity_V128);
5084          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
5085          if (U) {
5086             /* VRSUBHN */
5087             assign(res, binop(op2, mkexpr(tmp),
5088                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
5089          } else {
5090             assign(res, mkexpr(tmp));
5091          }
5092          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
5093                     condT);
5094          DIP("v%ssubhn.i%d d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
5095              nreg, mreg);
5096          return True;
5097       case 7:
5098          /* VABDL */
5099          if (!((theInstr >> 23) & 1)) {
5100             vpanic("VABL should not be in dis_neon_data_3diff\n");
5101          }
5102          if (dreg & 1)
5103             return False;
5104          dreg >>= 1;
5105          switch (size) {
5106             case 0:
5107                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
5108                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
5109                cvt2 = Iop_Widen8Sto16x8;
5110                op = Iop_Sub16x8;
5111                break;
5112             case 1:
5113                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
5114                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
5115                cvt2 = Iop_Widen16Sto32x4;
5116                op = Iop_Sub32x4;
5117                break;
5118             case 2:
5119                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
5120                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
5121                cvt2 = Iop_Widen32Sto64x2;
5122                op = Iop_Sub64x2;
5123                break;
5124             case 3:
5125                return False;
5126             default:
5127                vassert(0);
5128          }
5129          arg_n = newTemp(Ity_V128);
5130          arg_m = newTemp(Ity_V128);
5131          cond = newTemp(Ity_V128);
5132          res = newTemp(Ity_V128);
5133          assign(arg_n, unop(cvt, getDRegI64(nreg)));
5134          assign(arg_m, unop(cvt, getDRegI64(mreg)));
5135          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
5136                                             getDRegI64(mreg))));
5137          assign(res, binop(Iop_OrV128,
5138                            binop(Iop_AndV128,
5139                                  binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5140                                  mkexpr(cond)),
5141                            binop(Iop_AndV128,
5142                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
5143                                  unop(Iop_NotV128, mkexpr(cond)))));
5144          putQReg(dreg, mkexpr(res), condT);
5145          DIP("vabdl.%c%d q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
5146              nreg, mreg);
5147          return True;
5148       case 8:
5149       case 10:
5150          /* VMLAL, VMLSL (integer) */
5151          if (dreg & 1)
5152             return False;
5153          dreg >>= 1;
5154          size = B;
5155          switch (size) {
5156             case 0:
5157                op = U ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5158                op2 = P ? Iop_Sub16x8 : Iop_Add16x8;
5159                break;
5160             case 1:
5161                op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5162                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5163                break;
5164             case 2:
5165                op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5166                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5167                break;
5168             case 3:
5169                return False;
5170             default:
5171                vassert(0);
5172          }
5173          res = newTemp(Ity_V128);
5174          assign(res, binop(op, getDRegI64(nreg),getDRegI64(mreg)));
5175          putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5176          DIP("vml%cl.%c%d q%u, d%u, d%u\n", P ? 's' : 'a', U ? 'u' : 's',
5177              8 << size, dreg, nreg, mreg);
5178          return True;
5179       case 9:
5180       case 11:
5181          /* VQDMLAL, VQDMLSL */
5182          if (U)
5183             return False;
5184          if (dreg & 1)
5185             return False;
5186          dreg >>= 1;
5187          size = B;
5188          switch (size) {
5189             case 0: case 3:
5190                return False;
5191             case 1:
5192                op = Iop_QDMull16Sx4;
5193                cmp = Iop_CmpEQ16x4;
5194                add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5195                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5196                imm = 1LL << 15;
5197                imm = (imm << 16) | imm;
5198                imm = (imm << 32) | imm;
5199                break;
5200             case 2:
5201                op = Iop_QDMull32Sx2;
5202                cmp = Iop_CmpEQ32x2;
5203                add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5204                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5205                imm = 1LL << 31;
5206                imm = (imm << 32) | imm;
5207                break;
5208             default:
5209                vassert(0);
5210          }
5211          res = newTemp(Ity_V128);
5212          tmp = newTemp(Ity_V128);
5213          assign(res, binop(op, getDRegI64(nreg), getDRegI64(mreg)));
5214          assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5215          setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5216                     True, condT);
5217          setFlag_QC(binop(Iop_And64,
5218                           binop(cmp, getDRegI64(nreg), mkU64(imm)),
5219                           binop(cmp, getDRegI64(mreg), mkU64(imm))),
5220                     mkU64(0),
5221                     False, condT);
5222          putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5223          DIP("vqdml%cl.s%d q%u, d%u, d%u\n", P ? 's' : 'a', 8 << size, dreg,
5224              nreg, mreg);
5225          return True;
5226       case 12:
5227       case 14:
5228          /* VMULL (integer or polynomial) */
5229          if (dreg & 1)
5230             return False;
5231          dreg >>= 1;
5232          size = B;
5233          switch (size) {
5234             case 0:
5235                op = (U) ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5236                if (P)
5237                   op = Iop_PolynomialMull8x8;
5238                break;
5239             case 1:
5240                if (P) return False;
5241                op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5242                break;
5243             case 2:
5244                if (P) return False;
5245                op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5246                break;
5247             case 3:
5248                return False;
5249             default:
5250                vassert(0);
5251          }
5252          putQReg(dreg, binop(op, getDRegI64(nreg),
5253                                  getDRegI64(mreg)), condT);
5254          DIP("vmull.%c%d q%u, d%u, d%u\n", P ? 'p' : (U ? 'u' : 's'),
5255                8 << size, dreg, nreg, mreg);
5256          return True;
5257       case 13:
5258          /* VQDMULL */
5259          if (U)
5260             return False;
5261          if (dreg & 1)
5262             return False;
5263          dreg >>= 1;
5264          size = B;
5265          switch (size) {
5266             case 0:
5267             case 3:
5268                return False;
5269             case 1:
5270                op = Iop_QDMull16Sx4;
5271                op2 = Iop_CmpEQ16x4;
5272                imm = 1LL << 15;
5273                imm = (imm << 16) | imm;
5274                imm = (imm << 32) | imm;
5275                break;
5276             case 2:
5277                op = Iop_QDMull32Sx2;
5278                op2 = Iop_CmpEQ32x2;
5279                imm = 1LL << 31;
5280                imm = (imm << 32) | imm;
5281                break;
5282             default:
5283                vassert(0);
5284          }
5285          putQReg(dreg, binop(op, getDRegI64(nreg), getDRegI64(mreg)),
5286                condT);
5287          setFlag_QC(binop(Iop_And64,
5288                           binop(op2, getDRegI64(nreg), mkU64(imm)),
5289                           binop(op2, getDRegI64(mreg), mkU64(imm))),
5290                     mkU64(0),
5291                     False, condT);
5292          DIP("vqdmull.s%d q%u, d%u, d%u\n", 8 << size, dreg, nreg, mreg);
5293          return True;
5294       default:
5295          return False;
5296    }
5297    return False;
5298 }
5299
5300 /* A7.4.3 Two registers and a scalar */
5301 static
5302 Bool dis_neon_data_2reg_and_scalar ( UInt theInstr, IRTemp condT )
5303 {
5304 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
5305    UInt U = INSN(24,24);
5306    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
5307    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
5308    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
5309    UInt size = INSN(21,20);
5310    UInt index;
5311    UInt Q = INSN(24,24);
5312
5313    if (INSN(27,25) != 1 || INSN(23,23) != 1
5314        || INSN(6,6) != 1 || INSN(4,4) != 0)
5315       return False;
5316
5317    /* VMLA, VMLS (scalar)  */
5318    if ((INSN(11,8) & BITS4(1,0,1,0)) == BITS4(0,0,0,0)) {
5319       IRTemp res, arg_m, arg_n;
5320       IROp dup, get, op, op2, add, sub;
5321       if (Q) {
5322          if ((dreg & 1) || (nreg & 1))
5323             return False;
5324          dreg >>= 1;
5325          nreg >>= 1;
5326          res = newTemp(Ity_V128);
5327          arg_m = newTemp(Ity_V128);
5328          arg_n = newTemp(Ity_V128);
5329          assign(arg_n, getQReg(nreg));
5330          switch(size) {
5331             case 1:
5332                dup = Iop_Dup16x8;
5333                get = Iop_GetElem16x4;
5334                index = mreg >> 3;
5335                mreg &= 7;
5336                break;
5337             case 2:
5338                dup = Iop_Dup32x4;
5339                get = Iop_GetElem32x2;
5340                index = mreg >> 4;
5341                mreg &= 0xf;
5342                break;
5343             case 0:
5344             case 3:
5345                return False;
5346             default:
5347                vassert(0);
5348          }
5349          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5350       } else {
5351          res = newTemp(Ity_I64);
5352          arg_m = newTemp(Ity_I64);
5353          arg_n = newTemp(Ity_I64);
5354          assign(arg_n, getDRegI64(nreg));
5355          switch(size) {
5356             case 1:
5357                dup = Iop_Dup16x4;
5358                get = Iop_GetElem16x4;
5359                index = mreg >> 3;
5360                mreg &= 7;
5361                break;
5362             case 2:
5363                dup = Iop_Dup32x2;
5364                get = Iop_GetElem32x2;
5365                index = mreg >> 4;
5366                mreg &= 0xf;
5367                break;
5368             case 0:
5369             case 3:
5370                return False;
5371             default:
5372                vassert(0);
5373          }
5374          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5375       }
5376       if (INSN(8,8)) {
5377          switch (size) {
5378             case 2:
5379                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5380                add = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
5381                sub = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
5382                break;
5383             case 0:
5384             case 1:
5385             case 3:
5386                return False;
5387             default:
5388                vassert(0);
5389          }
5390       } else {
5391          switch (size) {
5392             case 1:
5393                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5394                add = Q ? Iop_Add16x8 : Iop_Add16x4;
5395                sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
5396                break;
5397             case 2:
5398                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5399                add = Q ? Iop_Add32x4 : Iop_Add32x2;
5400                sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
5401                break;
5402             case 0:
5403             case 3:
5404                return False;
5405             default:
5406                vassert(0);
5407          }
5408       }
5409       op2 = INSN(10,10) ? sub : add;
5410       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5411       if (Q)
5412          putQReg(dreg, binop_w_fake_RM(op2, getQReg(dreg), mkexpr(res)),
5413                  condT);
5414       else
5415          putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)),
5416                     condT);
5417       DIP("vml%c.%c%d %c%u, %c%u, d%u[%u]\n", INSN(10,10) ? 's' : 'a',
5418             INSN(8,8) ? 'f' : 'i', 8 << size,
5419             Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, mreg, index);
5420       return True;
5421    }
5422
5423    /* VMLAL, VMLSL (scalar)   */
5424    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,0)) {
5425       IRTemp res, arg_m, arg_n;
5426       IROp dup, get, op, op2, add, sub;
5427       if (dreg & 1)
5428          return False;
5429       dreg >>= 1;
5430       res = newTemp(Ity_V128);
5431       arg_m = newTemp(Ity_I64);
5432       arg_n = newTemp(Ity_I64);
5433       assign(arg_n, getDRegI64(nreg));
5434       switch(size) {
5435          case 1:
5436             dup = Iop_Dup16x4;
5437             get = Iop_GetElem16x4;
5438             index = mreg >> 3;
5439             mreg &= 7;
5440             break;
5441          case 2:
5442             dup = Iop_Dup32x2;
5443             get = Iop_GetElem32x2;
5444             index = mreg >> 4;
5445             mreg &= 0xf;
5446             break;
5447          case 0:
5448          case 3:
5449             return False;
5450          default:
5451             vassert(0);
5452       }
5453       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5454       switch (size) {
5455          case 1:
5456             op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5457             add = Iop_Add32x4;
5458             sub = Iop_Sub32x4;
5459             break;
5460          case 2:
5461             op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5462             add = Iop_Add64x2;
5463             sub = Iop_Sub64x2;
5464             break;
5465          case 0:
5466          case 3:
5467             return False;
5468          default:
5469             vassert(0);
5470       }
5471       op2 = INSN(10,10) ? sub : add;
5472       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5473       putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5474       DIP("vml%cl.%c%d q%u, d%u, d%u[%u]\n",
5475           INSN(10,10) ? 's' : 'a', U ? 'u' : 's',
5476           8 << size, dreg, nreg, mreg, index);
5477       return True;
5478    }
5479
5480    /* VQDMLAL, VQDMLSL (scalar)  */
5481    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,1) && !U) {
5482       IRTemp res, arg_m, arg_n, tmp;
5483       IROp dup, get, op, op2, add, cmp;
5484       UInt P = INSN(10,10);
5485       ULong imm;
5486       if (dreg & 1)
5487          return False;
5488       dreg >>= 1;
5489       res = newTemp(Ity_V128);
5490       arg_m = newTemp(Ity_I64);
5491       arg_n = newTemp(Ity_I64);
5492       assign(arg_n, getDRegI64(nreg));
5493       switch(size) {
5494          case 1:
5495             dup = Iop_Dup16x4;
5496             get = Iop_GetElem16x4;
5497             index = mreg >> 3;
5498             mreg &= 7;
5499             break;
5500          case 2:
5501             dup = Iop_Dup32x2;
5502             get = Iop_GetElem32x2;
5503             index = mreg >> 4;
5504             mreg &= 0xf;
5505             break;
5506          case 0:
5507          case 3:
5508             return False;
5509          default:
5510             vassert(0);
5511       }
5512       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5513       switch (size) {
5514          case 0:
5515          case 3:
5516             return False;
5517          case 1:
5518             op = Iop_QDMull16Sx4;
5519             cmp = Iop_CmpEQ16x4;
5520             add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5521             op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5522             imm = 1LL << 15;
5523             imm = (imm << 16) | imm;
5524             imm = (imm << 32) | imm;
5525             break;
5526          case 2:
5527             op = Iop_QDMull32Sx2;
5528             cmp = Iop_CmpEQ32x2;
5529             add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5530             op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5531             imm = 1LL << 31;
5532             imm = (imm << 32) | imm;
5533             break;
5534          default:
5535             vassert(0);
5536       }
5537       res = newTemp(Ity_V128);
5538       tmp = newTemp(Ity_V128);
5539       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5540       assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5541       setFlag_QC(binop(Iop_And64,
5542                        binop(cmp, mkexpr(arg_n), mkU64(imm)),
5543                        binop(cmp, mkexpr(arg_m), mkU64(imm))),
5544                  mkU64(0),
5545                  False, condT);
5546       setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5547                  True, condT);
5548       putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5549       DIP("vqdml%cl.s%d q%u, d%u, d%u[%u]\n", P ? 's' : 'a', 8 << size,
5550           dreg, nreg, mreg, index);
5551       return True;
5552    }
5553
5554    /* VMUL (by scalar)  */
5555    if ((INSN(11,8) & BITS4(1,1,1,0)) == BITS4(1,0,0,0)) {
5556       IRTemp res, arg_m, arg_n;
5557       IROp dup, get, op;
5558       if (Q) {
5559          if ((dreg & 1) || (nreg & 1))
5560             return False;
5561          dreg >>= 1;
5562          nreg >>= 1;
5563          res = newTemp(Ity_V128);
5564          arg_m = newTemp(Ity_V128);
5565          arg_n = newTemp(Ity_V128);
5566          assign(arg_n, getQReg(nreg));
5567          switch(size) {
5568             case 1:
5569                dup = Iop_Dup16x8;
5570                get = Iop_GetElem16x4;
5571                index = mreg >> 3;
5572                mreg &= 7;
5573                break;
5574             case 2:
5575                dup = Iop_Dup32x4;
5576                get = Iop_GetElem32x2;
5577                index = mreg >> 4;
5578                mreg &= 0xf;
5579                break;
5580             case 0:
5581             case 3:
5582                return False;
5583             default:
5584                vassert(0);
5585          }
5586          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5587       } else {
5588          res = newTemp(Ity_I64);
5589          arg_m = newTemp(Ity_I64);
5590          arg_n = newTemp(Ity_I64);
5591          assign(arg_n, getDRegI64(nreg));
5592          switch(size) {
5593             case 1:
5594                dup = Iop_Dup16x4;
5595                get = Iop_GetElem16x4;
5596                index = mreg >> 3;
5597                mreg &= 7;
5598                break;
5599             case 2:
5600                dup = Iop_Dup32x2;
5601                get = Iop_GetElem32x2;
5602                index = mreg >> 4;
5603                mreg &= 0xf;
5604                break;
5605             case 0:
5606             case 3:
5607                return False;
5608             default:
5609                vassert(0);
5610          }
5611          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5612       }
5613       if (INSN(8,8)) {
5614          switch (size) {
5615             case 2:
5616                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5617                break;
5618             case 0:
5619             case 1:
5620             case 3:
5621                return False;
5622             default:
5623                vassert(0);
5624          }
5625       } else {
5626          switch (size) {
5627             case 1:
5628                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5629                break;
5630             case 2:
5631                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5632                break;
5633             case 0:
5634             case 3:
5635                return False;
5636             default:
5637                vassert(0);
5638          }
5639       }
5640       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5641       if (Q)
5642          putQReg(dreg, mkexpr(res), condT);
5643       else
5644          putDRegI64(dreg, mkexpr(res), condT);
5645       DIP("vmul.%c%d %c%u, %c%u, d%u[%u]\n", INSN(8,8) ? 'f' : 'i',
5646           8 << size, Q ? 'q' : 'd', dreg,
5647           Q ? 'q' : 'd', nreg, mreg, index);
5648       return True;
5649    }
5650
5651    /* VMULL (scalar) */
5652    if (INSN(11,8) == BITS4(1,0,1,0)) {
5653       IRTemp res, arg_m, arg_n;
5654       IROp dup, get, op;
5655       if (dreg & 1)
5656          return False;
5657       dreg >>= 1;
5658       res = newTemp(Ity_V128);
5659       arg_m = newTemp(Ity_I64);
5660       arg_n = newTemp(Ity_I64);
5661       assign(arg_n, getDRegI64(nreg));
5662       switch(size) {
5663          case 1:
5664             dup = Iop_Dup16x4;
5665             get = Iop_GetElem16x4;
5666             index = mreg >> 3;
5667             mreg &= 7;
5668             break;
5669          case 2:
5670             dup = Iop_Dup32x2;
5671             get = Iop_GetElem32x2;
5672             index = mreg >> 4;
5673             mreg &= 0xf;
5674             break;
5675          case 0:
5676          case 3:
5677             return False;
5678          default:
5679             vassert(0);
5680       }
5681       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5682       switch (size) {
5683          case 1: op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4; break;
5684          case 2: op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2; break;
5685          case 0: case 3: return False;
5686          default: vassert(0);
5687       }
5688       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5689       putQReg(dreg, mkexpr(res), condT);
5690       DIP("vmull.%c%d q%u, d%u, d%u[%u]\n", U ? 'u' : 's', 8 << size, dreg,
5691           nreg, mreg, index);
5692       return True;
5693    }
5694
5695    /* VQDMULL */
5696    if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
5697       IROp op ,op2, dup, get;
5698       ULong imm;
5699       IRTemp arg_m, arg_n;
5700       if (dreg & 1)
5701          return False;
5702       dreg >>= 1;
5703       arg_m = newTemp(Ity_I64);
5704       arg_n = newTemp(Ity_I64);
5705       assign(arg_n, getDRegI64(nreg));
5706       switch(size) {
5707          case 1:
5708             dup = Iop_Dup16x4;
5709             get = Iop_GetElem16x4;
5710             index = mreg >> 3;
5711             mreg &= 7;
5712             break;
5713          case 2:
5714             dup = Iop_Dup32x2;
5715             get = Iop_GetElem32x2;
5716             index = mreg >> 4;
5717             mreg &= 0xf;
5718             break;
5719          case 0:
5720          case 3:
5721             return False;
5722          default:
5723             vassert(0);
5724       }
5725       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5726       switch (size) {
5727          case 0:
5728          case 3:
5729             return False;
5730          case 1:
5731             op = Iop_QDMull16Sx4;
5732             op2 = Iop_CmpEQ16x4;
5733             imm = 1LL << 15;
5734             imm = (imm << 16) | imm;
5735             imm = (imm << 32) | imm;
5736             break;
5737          case 2:
5738             op = Iop_QDMull32Sx2;
5739             op2 = Iop_CmpEQ32x2;
5740             imm = 1LL << 31;
5741             imm = (imm << 32) | imm;
5742             break;
5743          default:
5744             vassert(0);
5745       }
5746       putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5747             condT);
5748       setFlag_QC(binop(Iop_And64,
5749                        binop(op2, mkexpr(arg_n), mkU64(imm)),
5750                        binop(op2, mkexpr(arg_m), mkU64(imm))),
5751                  mkU64(0),
5752                  False, condT);
5753       DIP("vqdmull.s%d q%u, d%u, d%u[%u]\n", 8 << size, dreg, nreg, mreg,
5754           index);
5755       return True;
5756    }
5757
5758    /* VQDMULH */
5759    if (INSN(11,8) == BITS4(1,1,0,0)) {
5760       IROp op ,op2, dup, get;
5761       ULong imm;
5762       IRTemp res, arg_m, arg_n;
5763       if (Q) {
5764          if ((dreg & 1) || (nreg & 1))
5765             return False;
5766          dreg >>= 1;
5767          nreg >>= 1;
5768          res = newTemp(Ity_V128);
5769          arg_m = newTemp(Ity_V128);
5770          arg_n = newTemp(Ity_V128);
5771          assign(arg_n, getQReg(nreg));
5772          switch(size) {
5773             case 1:
5774                dup = Iop_Dup16x8;
5775                get = Iop_GetElem16x4;
5776                index = mreg >> 3;
5777                mreg &= 7;
5778                break;
5779             case 2:
5780                dup = Iop_Dup32x4;
5781                get = Iop_GetElem32x2;
5782                index = mreg >> 4;
5783                mreg &= 0xf;
5784                break;
5785             case 0:
5786             case 3:
5787                return False;
5788             default:
5789                vassert(0);
5790          }
5791          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5792       } else {
5793          res = newTemp(Ity_I64);
5794          arg_m = newTemp(Ity_I64);
5795          arg_n = newTemp(Ity_I64);
5796          assign(arg_n, getDRegI64(nreg));
5797          switch(size) {
5798             case 1:
5799                dup = Iop_Dup16x4;
5800                get = Iop_GetElem16x4;
5801                index = mreg >> 3;
5802                mreg &= 7;
5803                break;
5804             case 2:
5805                dup = Iop_Dup32x2;
5806                get = Iop_GetElem32x2;
5807                index = mreg >> 4;
5808                mreg &= 0xf;
5809                break;
5810             case 0:
5811             case 3:
5812                return False;
5813             default:
5814                vassert(0);
5815          }
5816          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5817       }
5818       switch (size) {
5819          case 0:
5820          case 3:
5821             return False;
5822          case 1:
5823             op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
5824             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5825             imm = 1LL << 15;
5826             imm = (imm << 16) | imm;
5827             imm = (imm << 32) | imm;
5828             break;
5829          case 2:
5830             op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
5831             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5832             imm = 1LL << 31;
5833             imm = (imm << 32) | imm;
5834             break;
5835          default:
5836             vassert(0);
5837       }
5838       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5839       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5840                        binop(op2, mkexpr(arg_n),
5841                                   Q ? mkU128(imm) : mkU64(imm)),
5842                        binop(op2, mkexpr(arg_m),
5843                              Q ? mkU128(imm) : mkU64(imm))),
5844                  Q ? mkU128(0) : mkU64(0),
5845                  Q, condT);
5846       if (Q)
5847          putQReg(dreg, mkexpr(res), condT);
5848       else
5849          putDRegI64(dreg, mkexpr(res), condT);
5850       DIP("vqdmulh.s%d %c%u, %c%u, d%u[%u]\n",
5851           8 << size, Q ? 'q' : 'd', dreg,
5852           Q ? 'q' : 'd', nreg, mreg, index);
5853       return True;
5854    }
5855
5856    /* VQRDMULH (scalar) */
5857    if (INSN(11,8) == BITS4(1,1,0,1)) {
5858       IROp op ,op2, dup, get;
5859       ULong imm;
5860       IRTemp res, arg_m, arg_n;
5861       if (Q) {
5862          if ((dreg & 1) || (nreg & 1))
5863             return False;
5864          dreg >>= 1;
5865          nreg >>= 1;
5866          res = newTemp(Ity_V128);
5867          arg_m = newTemp(Ity_V128);
5868          arg_n = newTemp(Ity_V128);
5869          assign(arg_n, getQReg(nreg));
5870          switch(size) {
5871             case 1:
5872                dup = Iop_Dup16x8;
5873                get = Iop_GetElem16x4;
5874                index = mreg >> 3;
5875                mreg &= 7;
5876                break;
5877             case 2:
5878                dup = Iop_Dup32x4;
5879                get = Iop_GetElem32x2;
5880                index = mreg >> 4;
5881                mreg &= 0xf;
5882                break;
5883             case 0:
5884             case 3:
5885                return False;
5886             default:
5887                vassert(0);
5888          }
5889          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5890       } else {
5891          res = newTemp(Ity_I64);
5892          arg_m = newTemp(Ity_I64);
5893          arg_n = newTemp(Ity_I64);
5894          assign(arg_n, getDRegI64(nreg));
5895          switch(size) {
5896             case 1:
5897                dup = Iop_Dup16x4;
5898                get = Iop_GetElem16x4;
5899                index = mreg >> 3;
5900                mreg &= 7;
5901                break;
5902             case 2:
5903                dup = Iop_Dup32x2;
5904                get = Iop_GetElem32x2;
5905                index = mreg >> 4;
5906                mreg &= 0xf;
5907                break;
5908             case 0:
5909             case 3:
5910                return False;
5911             default:
5912                vassert(0);
5913          }
5914          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5915       }
5916       switch (size) {
5917          case 0:
5918          case 3:
5919             return False;
5920          case 1:
5921             op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
5922             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5923             imm = 1LL << 15;
5924             imm = (imm << 16) | imm;
5925             imm = (imm << 32) | imm;
5926             break;
5927          case 2:
5928             op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
5929             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5930             imm = 1LL << 31;
5931             imm = (imm << 32) | imm;
5932             break;
5933          default:
5934             vassert(0);
5935       }
5936       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5937       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5938                        binop(op2, mkexpr(arg_n),
5939                                   Q ? mkU128(imm) : mkU64(imm)),
5940                        binop(op2, mkexpr(arg_m),
5941                                   Q ? mkU128(imm) : mkU64(imm))),
5942                  Q ? mkU128(0) : mkU64(0),
5943                  Q, condT);
5944       if (Q)
5945          putQReg(dreg, mkexpr(res), condT);
5946       else
5947          putDRegI64(dreg, mkexpr(res), condT);
5948       DIP("vqrdmulh.s%d %c%u, %c%u, d%u[%u]\n",
5949           8 << size, Q ? 'q' : 'd', dreg,
5950           Q ? 'q' : 'd', nreg, mreg, index);
5951       return True;
5952    }
5953
5954    return False;
5955 #  undef INSN
5956 }
5957
5958 /* A7.4.4 Two registers and a shift amount */
5959 static
5960 Bool dis_neon_data_2reg_and_shift ( UInt theInstr, IRTemp condT )
5961 {
5962    UInt A = (theInstr >> 8) & 0xf;
5963    UInt B = (theInstr >> 6) & 1;
5964    UInt L = (theInstr >> 7) & 1;
5965    UInt U = (theInstr >> 24) & 1;
5966    UInt Q = B;
5967    UInt imm6 = (theInstr >> 16) & 0x3f;
5968    UInt shift_imm;
5969    UInt size = 4;
5970    UInt tmp;
5971    UInt mreg = get_neon_m_regno(theInstr);
5972    UInt dreg = get_neon_d_regno(theInstr);
5973    ULong imm = 0;
5974    IROp op, cvt, add = Iop_INVALID, cvt2, op_rev;
5975    IRTemp reg_m, res, mask;
5976
5977    if (L == 0 && ((theInstr >> 19) & 7) == 0)
5978       /* It is one reg and immediate */
5979       return False;
5980
5981    tmp = (L << 6) | imm6;
5982    if (tmp & 0x40) {
5983       size = 3;
5984       shift_imm = 64 - imm6;
5985    } else if (tmp & 0x20) {
5986       size = 2;
5987       shift_imm = 64 - imm6;
5988    } else if (tmp & 0x10) {
5989       size = 1;
5990       shift_imm = 32 - imm6;
5991    } else if (tmp & 0x8) {
5992       size = 0;
5993       shift_imm = 16 - imm6;
5994    } else {
5995       return False;
5996    }
5997
5998    switch (A) {
5999       case 3:
6000       case 2:
6001          /* VRSHR, VRSRA */
6002          if (shift_imm > 0) {
6003             IRExpr *imm_val;
6004             imm = 1L;
6005             switch (size) {
6006                case 0:
6007                   imm = (imm << 8) | imm;
6008                   /* fall through */
6009                case 1:
6010                   imm = (imm << 16) | imm;
6011                   /* fall through */
6012                case 2:
6013                   imm = (imm << 32) | imm;
6014                   /* fall through */
6015                case 3:
6016                   break;
6017                default:
6018                   vassert(0);
6019             }
6020             if (Q) {
6021                reg_m = newTemp(Ity_V128);
6022                res = newTemp(Ity_V128);
6023                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
6024                assign(reg_m, getQReg(mreg));
6025                switch (size) {
6026                   case 0:
6027                      add = Iop_Add8x16;
6028                      op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
6029                      break;
6030                   case 1:
6031                      add = Iop_Add16x8;
6032                      op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6033                      break;
6034                   case 2:
6035                      add = Iop_Add32x4;
6036                      op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6037                      break;
6038                   case 3:
6039                      add = Iop_Add64x2;
6040                      op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6041                      break;
6042                   default:
6043                      vassert(0);
6044                }
6045             } else {
6046                reg_m = newTemp(Ity_I64);
6047                res = newTemp(Ity_I64);
6048                imm_val = mkU64(imm);
6049                assign(reg_m, getDRegI64(mreg));
6050                switch (size) {
6051                   case 0:
6052                      add = Iop_Add8x8;
6053                      op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
6054                      break;
6055                   case 1:
6056                      add = Iop_Add16x4;
6057                      op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
6058                      break;
6059                   case 2:
6060                      add = Iop_Add32x2;
6061                      op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6062                      break;
6063                   case 3:
6064                      add = Iop_Add64;
6065                      op = U ? Iop_Shr64 : Iop_Sar64;
6066                      break;
6067                   default:
6068                      vassert(0);
6069                }
6070             }
6071             assign(res,
6072                    binop(add,
6073                          binop(op,
6074                                mkexpr(reg_m),
6075                                mkU8(shift_imm)),
6076                          binop(Q ? Iop_AndV128 : Iop_And64,
6077                                binop(op,
6078                                      mkexpr(reg_m),
6079                                      mkU8(shift_imm - 1)),
6080                                imm_val)));
6081          } else {
6082             if (Q) {
6083                res = newTemp(Ity_V128);
6084                assign(res, getQReg(mreg));
6085             } else {
6086                res = newTemp(Ity_I64);
6087                assign(res, getDRegI64(mreg));
6088             }
6089          }
6090          if (A == 3) {
6091             if (Q) {
6092                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6093                              condT);
6094             } else {
6095                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6096                                 condT);
6097             }
6098             DIP("vrsra.%c%d %c%u, %c%u, #%u\n",
6099                 U ? 'u' : 's', 8 << size,
6100                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6101          } else {
6102             if (Q) {
6103                putQReg(dreg, mkexpr(res), condT);
6104             } else {
6105                putDRegI64(dreg, mkexpr(res), condT);
6106             }
6107             DIP("vrshr.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6108                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6109          }
6110          return True;
6111       case 1:
6112       case 0:
6113          /* VSHR, VSRA */
6114          if (Q) {
6115             reg_m = newTemp(Ity_V128);
6116             assign(reg_m, getQReg(mreg));
6117             res = newTemp(Ity_V128);
6118          } else {
6119             reg_m = newTemp(Ity_I64);
6120             assign(reg_m, getDRegI64(mreg));
6121             res = newTemp(Ity_I64);
6122          }
6123          if (Q) {
6124             switch (size) {
6125                case 0:
6126                   op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
6127                   add = Iop_Add8x16;
6128                   break;
6129                case 1:
6130                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6131                   add = Iop_Add16x8;
6132                   break;
6133                case 2:
6134                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6135                   add = Iop_Add32x4;
6136                   break;
6137                case 3:
6138                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6139                   add = Iop_Add64x2;
6140                   break;
6141                default:
6142                   vassert(0);
6143             }
6144          } else {
6145             switch (size) {
6146                case 0:
6147                   op =  U ? Iop_ShrN8x8 : Iop_SarN8x8;
6148                   add = Iop_Add8x8;
6149                   break;
6150                case 1:
6151                   op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
6152                   add = Iop_Add16x4;
6153                   break;
6154                case 2:
6155                   op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6156                   add = Iop_Add32x2;
6157                   break;
6158                case 3:
6159                   op = U ? Iop_Shr64 : Iop_Sar64;
6160                   add = Iop_Add64;
6161                   break;
6162                default:
6163                   vassert(0);
6164             }
6165          }
6166          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6167          if (A == 1) {
6168             if (Q) {
6169                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6170                              condT);
6171             } else {
6172                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6173                                 condT);
6174             }
6175             DIP("vsra.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6176                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6177          } else {
6178             if (Q) {
6179                putQReg(dreg, mkexpr(res), condT);
6180             } else {
6181                putDRegI64(dreg, mkexpr(res), condT);
6182             }
6183             DIP("vshr.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6184                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6185          }
6186          return True;
6187       case 4:
6188          /* VSRI */
6189          if (!U)
6190             return False;
6191          if (Q) {
6192             res = newTemp(Ity_V128);
6193             mask = newTemp(Ity_V128);
6194          } else {
6195             res = newTemp(Ity_I64);
6196             mask = newTemp(Ity_I64);
6197          }
6198          switch (size) {
6199             case 0: op = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; break;
6200             case 1: op = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; break;
6201             case 2: op = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; break;
6202             case 3: op = Q ? Iop_ShrN64x2 : Iop_Shr64; break;
6203             default: vassert(0);
6204          }
6205          if (Q) {
6206             assign(mask, binop(op, binop(Iop_64HLtoV128,
6207                                          mkU64(0xFFFFFFFFFFFFFFFFLL),
6208                                          mkU64(0xFFFFFFFFFFFFFFFFLL)),
6209                                mkU8(shift_imm)));
6210             assign(res, binop(Iop_OrV128,
6211                               binop(Iop_AndV128,
6212                                     getQReg(dreg),
6213                                     unop(Iop_NotV128,
6214                                          mkexpr(mask))),
6215                               binop(op,
6216                                     getQReg(mreg),
6217                                     mkU8(shift_imm))));
6218             putQReg(dreg, mkexpr(res), condT);
6219          } else {
6220             assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6221                                mkU8(shift_imm)));
6222             assign(res, binop(Iop_Or64,
6223                               binop(Iop_And64,
6224                                     getDRegI64(dreg),
6225                                     unop(Iop_Not64,
6226                                          mkexpr(mask))),
6227                               binop(op,
6228                                     getDRegI64(mreg),
6229                                     mkU8(shift_imm))));
6230             putDRegI64(dreg, mkexpr(res), condT);
6231          }
6232          DIP("vsri.%d %c%u, %c%u, #%u\n",
6233              8 << size, Q ? 'q' : 'd', dreg,
6234              Q ? 'q' : 'd', mreg, shift_imm);
6235          return True;
6236       case 5:
6237          if (U) {
6238             /* VSLI */
6239             shift_imm = 8 * (1 << size) - shift_imm;
6240             if (Q) {
6241                res = newTemp(Ity_V128);
6242                mask = newTemp(Ity_V128);
6243             } else {
6244                res = newTemp(Ity_I64);
6245                mask = newTemp(Ity_I64);
6246             }
6247             switch (size) {
6248                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6249                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6250                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6251                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6252                default: vassert(0);
6253             }
6254             if (Q) {
6255                assign(mask, binop(op, binop(Iop_64HLtoV128,
6256                                             mkU64(0xFFFFFFFFFFFFFFFFLL),
6257                                             mkU64(0xFFFFFFFFFFFFFFFFLL)),
6258                                   mkU8(shift_imm)));
6259                assign(res, binop(Iop_OrV128,
6260                                  binop(Iop_AndV128,
6261                                        getQReg(dreg),
6262                                        unop(Iop_NotV128,
6263                                             mkexpr(mask))),
6264                                  binop(op,
6265                                        getQReg(mreg),
6266                                        mkU8(shift_imm))));
6267                putQReg(dreg, mkexpr(res), condT);
6268             } else {
6269                assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6270                                   mkU8(shift_imm)));
6271                assign(res, binop(Iop_Or64,
6272                                  binop(Iop_And64,
6273                                        getDRegI64(dreg),
6274                                        unop(Iop_Not64,
6275                                             mkexpr(mask))),
6276                                  binop(op,
6277                                        getDRegI64(mreg),
6278                                        mkU8(shift_imm))));
6279                putDRegI64(dreg, mkexpr(res), condT);
6280             }
6281             DIP("vsli.%d %c%u, %c%u, #%u\n",
6282                 8 << size, Q ? 'q' : 'd', dreg,
6283                 Q ? 'q' : 'd', mreg, shift_imm);
6284             return True;
6285          } else {
6286             /* VSHL #imm */
6287             shift_imm = 8 * (1 << size) - shift_imm;
6288             if (Q) {
6289                res = newTemp(Ity_V128);
6290             } else {
6291                res = newTemp(Ity_I64);
6292             }
6293             switch (size) {
6294                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6295                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6296                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6297                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6298                default: vassert(0);
6299             }
6300             assign(res, binop(op, Q ? getQReg(mreg) : getDRegI64(mreg),
6301                      mkU8(shift_imm)));
6302             if (Q) {
6303                putQReg(dreg, mkexpr(res), condT);
6304             } else {
6305                putDRegI64(dreg, mkexpr(res), condT);
6306             }
6307             DIP("vshl.i%d %c%u, %c%u, #%u\n",
6308                 8 << size, Q ? 'q' : 'd', dreg,
6309                 Q ? 'q' : 'd', mreg, shift_imm);
6310             return True;
6311          }
6312          break;
6313       case 6:
6314       case 7:
6315          /* VQSHL, VQSHLU */
6316          shift_imm = 8 * (1 << size) - shift_imm;
6317          if (U) {
6318             if (A & 1) {
6319                switch (size) {
6320                   case 0:
6321                      op = Q ? Iop_QShlNsatUU8x16 : Iop_QShlNsatUU8x8;
6322                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6323                      break;
6324                   case 1:
6325                      op = Q ? Iop_QShlNsatUU16x8 : Iop_QShlNsatUU16x4;
6326                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6327                      break;
6328                   case 2:
6329                      op = Q ? Iop_QShlNsatUU32x4 : Iop_QShlNsatUU32x2;
6330                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6331                      break;
6332                   case 3:
6333                      op = Q ? Iop_QShlNsatUU64x2 : Iop_QShlNsatUU64x1;
6334                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6335                      break;
6336                   default:
6337                      vassert(0);
6338                }
6339                DIP("vqshl.u%d %c%u, %c%u, #%u\n",
6340                    8 << size,
6341                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6342             } else {
6343                switch (size) {
6344                   case 0:
6345                      op = Q ? Iop_QShlNsatSU8x16 : Iop_QShlNsatSU8x8;
6346                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6347                      break;
6348                   case 1:
6349                      op = Q ? Iop_QShlNsatSU16x8 : Iop_QShlNsatSU16x4;
6350                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6351                      break;
6352                   case 2:
6353                      op = Q ? Iop_QShlNsatSU32x4 : Iop_QShlNsatSU32x2;
6354                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6355                      break;
6356                   case 3:
6357                      op = Q ? Iop_QShlNsatSU64x2 : Iop_QShlNsatSU64x1;
6358                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6359                      break;
6360                   default:
6361                      vassert(0);
6362                }
6363                DIP("vqshlu.s%d %c%u, %c%u, #%u\n",
6364                    8 << size,
6365                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6366             }
6367          } else {
6368             if (!(A & 1))
6369                return False;
6370             switch (size) {
6371                case 0:
6372                   op = Q ? Iop_QShlNsatSS8x16 : Iop_QShlNsatSS8x8;
6373                   op_rev = Q ? Iop_SarN8x16 : Iop_SarN8x8;
6374                   break;
6375                case 1:
6376                   op = Q ? Iop_QShlNsatSS16x8 : Iop_QShlNsatSS16x4;
6377                   op_rev = Q ? Iop_SarN16x8 : Iop_SarN16x4;
6378                   break;
6379                case 2:
6380                   op = Q ? Iop_QShlNsatSS32x4 : Iop_QShlNsatSS32x2;
6381                   op_rev = Q ? Iop_SarN32x4 : Iop_SarN32x2;
6382                   break;
6383                case 3:
6384                   op = Q ? Iop_QShlNsatSS64x2 : Iop_QShlNsatSS64x1;
6385                   op_rev = Q ? Iop_SarN64x2 : Iop_Sar64;
6386                   break;
6387                default:
6388                   vassert(0);
6389             }
6390             DIP("vqshl.s%d %c%u, %c%u, #%u\n",
6391                 8 << size,
6392                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6393          }
6394          if (Q) {
6395             tmp = newTemp(Ity_V128);
6396             res = newTemp(Ity_V128);
6397             reg_m = newTemp(Ity_V128);
6398             assign(reg_m, getQReg(mreg));
6399          } else {
6400             tmp = newTemp(Ity_I64);
6401             res = newTemp(Ity_I64);
6402             reg_m = newTemp(Ity_I64);
6403             assign(reg_m, getDRegI64(mreg));
6404          }
6405          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6406          assign(tmp, binop(op_rev, mkexpr(res), mkU8(shift_imm)));
6407          setFlag_QC(mkexpr(tmp), mkexpr(reg_m), Q, condT);
6408          if (Q)
6409             putQReg(dreg, mkexpr(res), condT);
6410          else
6411             putDRegI64(dreg, mkexpr(res), condT);
6412          return True;
6413       case 8:
6414          if (!U) {
6415             if (L == 1)
6416                return False;
6417             size++;
6418             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6419             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
6420             if (mreg & 1)
6421                return False;
6422             mreg >>= 1;
6423             if (!B) {
6424                /* VSHRN*/
6425                IROp narOp;
6426                reg_m = newTemp(Ity_V128);
6427                assign(reg_m, getQReg(mreg));
6428                res = newTemp(Ity_I64);
6429                switch (size) {
6430                   case 1:
6431                      op = Iop_ShrN16x8;
6432                      narOp = Iop_NarrowUn16to8x8;
6433                      break;
6434                   case 2:
6435                      op = Iop_ShrN32x4;
6436                      narOp = Iop_NarrowUn32to16x4;
6437                      break;
6438                   case 3:
6439                      op = Iop_ShrN64x2;
6440                      narOp = Iop_NarrowUn64to32x2;
6441                      break;
6442                   default:
6443                      vassert(0);
6444                }
6445                assign(res, unop(narOp,
6446                                 binop(op,
6447                                       mkexpr(reg_m),
6448                                       mkU8(shift_imm))));
6449                putDRegI64(dreg, mkexpr(res), condT);
6450                DIP("vshrn.i%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6451                    shift_imm);
6452                return True;
6453             } else {
6454                /* VRSHRN   */
6455                IROp addOp, shOp, narOp;
6456                IRExpr *imm_val;
6457                reg_m = newTemp(Ity_V128);
6458                assign(reg_m, getQReg(mreg));
6459                res = newTemp(Ity_I64);
6460                imm = 1L;
6461                switch (size) {
6462                   case 0: imm = (imm <<  8) | imm; /* fall through */
6463                   case 1: imm = (imm << 16) | imm; /* fall through */
6464                   case 2: imm = (imm << 32) | imm; /* fall through */
6465                   case 3: break;
6466                   default: vassert(0);
6467                }
6468                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
6469                switch (size) {
6470                   case 1:
6471                      addOp = Iop_Add16x8;
6472                      shOp = Iop_ShrN16x8;
6473                      narOp = Iop_NarrowUn16to8x8;
6474                      break;
6475                   case 2:
6476                      addOp = Iop_Add32x4;
6477                      shOp = Iop_ShrN32x4;
6478                      narOp = Iop_NarrowUn32to16x4;
6479                      break;
6480                   case 3:
6481                      addOp = Iop_Add64x2;
6482                      shOp = Iop_ShrN64x2;
6483                      narOp = Iop_NarrowUn64to32x2;
6484                      break;
6485                   default:
6486                      vassert(0);
6487                }
6488                assign(res, unop(narOp,
6489                                 binop(addOp,
6490                                       binop(shOp,
6491                                             mkexpr(reg_m),
6492                                             mkU8(shift_imm)),
6493                                       binop(Iop_AndV128,
6494                                             binop(shOp,
6495                                                   mkexpr(reg_m),
6496                                                   mkU8(shift_imm - 1)),
6497                                             imm_val))));
6498                putDRegI64(dreg, mkexpr(res), condT);
6499                if (shift_imm == 0) {
6500                   DIP("vmov%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6501                       shift_imm);
6502                } else {
6503                   DIP("vrshrn.i%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6504                       shift_imm);
6505                }
6506                return True;
6507             }
6508          }
6509          /* else fall through */
6510       case 9:
6511          dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6512          mreg = ((theInstr >>  1) & 0x10) | (theInstr & 0xF);
6513          if (mreg & 1)
6514             return False;
6515          mreg >>= 1;
6516          size++;
6517          if ((theInstr >> 8) & 1) {
6518             switch (size) {
6519                case 1:
6520                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6521                   cvt = U ? Iop_QNarrowUn16Uto8Ux8 : Iop_QNarrowUn16Sto8Sx8;
6522                   cvt2 = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6523                   break;
6524                case 2:
6525                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6526                   cvt = U ? Iop_QNarrowUn32Uto16Ux4 : Iop_QNarrowUn32Sto16Sx4;
6527                   cvt2 = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6528                   break;
6529                case 3:
6530                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6531                   cvt = U ? Iop_QNarrowUn64Uto32Ux2 : Iop_QNarrowUn64Sto32Sx2;
6532                   cvt2 = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6533                   break;
6534                default:
6535                   vassert(0);
6536             }
6537             DIP("vq%sshrn.%c%d d%u, q%u, #%u\n", B ? "r" : "",
6538                 U ? 'u' : 's', 8 << size, dreg, mreg, shift_imm);
6539          } else {
6540             vassert(U);
6541             switch (size) {
6542                case 1:
6543                   op = Iop_SarN16x8;
6544                   cvt = Iop_QNarrowUn16Sto8Ux8;
6545                   cvt2 = Iop_Widen8Uto16x8;
6546                   break;
6547                case 2:
6548                   op = Iop_SarN32x4;
6549                   cvt = Iop_QNarrowUn32Sto16Ux4;
6550                   cvt2 = Iop_Widen16Uto32x4;
6551                   break;
6552                case 3:
6553                   op = Iop_SarN64x2;
6554                   cvt = Iop_QNarrowUn64Sto32Ux2;
6555                   cvt2 = Iop_Widen32Uto64x2;
6556                   break;
6557                default:
6558                   vassert(0);
6559             }
6560             DIP("vq%sshrun.s%d d%u, q%u, #%u\n", B ? "r" : "",
6561                 8 << size, dreg, mreg, shift_imm);
6562          }
6563          if (B) {
6564             if (shift_imm > 0) {
6565                imm = 1;
6566                switch (size) {
6567                   case 1: imm = (imm << 16) | imm; /* fall through */
6568                   case 2: imm = (imm << 32) | imm; /* fall through */
6569                   case 3: break;
6570                   case 0: default: vassert(0);
6571                }
6572                switch (size) {
6573                   case 1: add = Iop_Add16x8; break;
6574                   case 2: add = Iop_Add32x4; break;
6575                   case 3: add = Iop_Add64x2; break;
6576                   case 0: default: vassert(0);
6577                }
6578             }
6579          }
6580          reg_m = newTemp(Ity_V128);
6581          res = newTemp(Ity_V128);
6582          assign(reg_m, getQReg(mreg));
6583          if (B) {
6584             /* VQRSHRN, VQRSHRUN */
6585             assign(res, binop(add,
6586                               binop(op, mkexpr(reg_m), mkU8(shift_imm)),
6587                               binop(Iop_AndV128,
6588                                     binop(op,
6589                                           mkexpr(reg_m),
6590                                           mkU8(shift_imm - 1)),
6591                                     mkU128(imm))));
6592          } else {
6593             /* VQSHRN, VQSHRUN */
6594             assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6595          }
6596          setFlag_QC(unop(cvt2, unop(cvt, mkexpr(res))), mkexpr(res),
6597                     True, condT);
6598          putDRegI64(dreg, unop(cvt, mkexpr(res)), condT);
6599          return True;
6600       case 10:
6601          /* VSHLL
6602             VMOVL ::= VSHLL #0 */
6603          if (B)
6604             return False;
6605          if (dreg & 1)
6606             return False;
6607          dreg >>= 1;
6608          shift_imm = (8 << size) - shift_imm;
6609          res = newTemp(Ity_V128);
6610          switch (size) {
6611             case 0:
6612                op = Iop_ShlN16x8;
6613                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6614                break;
6615             case 1:
6616                op = Iop_ShlN32x4;
6617                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6618                break;
6619             case 2:
6620                op = Iop_ShlN64x2;
6621                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6622                break;
6623             case 3:
6624                return False;
6625             default:
6626                vassert(0);
6627          }
6628          assign(res, binop(op, unop(cvt, getDRegI64(mreg)), mkU8(shift_imm)));
6629          putQReg(dreg, mkexpr(res), condT);
6630          if (shift_imm == 0) {
6631             DIP("vmovl.%c%d q%u, d%u\n", U ? 'u' : 's', 8 << size,
6632                 dreg, mreg);
6633          } else {
6634             DIP("vshll.%c%d q%u, d%u, #%u\n", U ? 'u' : 's', 8 << size,
6635                 dreg, mreg, shift_imm);
6636          }
6637          return True;
6638       case 14:
6639       case 15:
6640          /* VCVT floating-point <-> fixed-point */
6641          if ((theInstr >> 8) & 1) {
6642             if (U) {
6643                op = Q ? Iop_F32ToFixed32Ux4_RZ : Iop_F32ToFixed32Ux2_RZ;
6644             } else {
6645                op = Q ? Iop_F32ToFixed32Sx4_RZ : Iop_F32ToFixed32Sx2_RZ;
6646             }
6647             DIP("vcvt.%c32.f32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6648                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6649                 64 - ((theInstr >> 16) & 0x3f));
6650          } else {
6651             if (U) {
6652                op = Q ? Iop_Fixed32UToF32x4_RN : Iop_Fixed32UToF32x2_RN;
6653             } else {
6654                op = Q ? Iop_Fixed32SToF32x4_RN : Iop_Fixed32SToF32x2_RN;
6655             }
6656             DIP("vcvt.f32.%c32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6657                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6658                 64 - ((theInstr >> 16) & 0x3f));
6659          }
6660          if (((theInstr >> 21) & 1) == 0)
6661             return False;
6662          if (Q) {
6663             putQReg(dreg, binop(op, getQReg(mreg),
6664                      mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6665          } else {
6666             putDRegI64(dreg, binop(op, getDRegI64(mreg),
6667                        mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6668          }
6669          return True;
6670       default:
6671          return False;
6672
6673    }
6674    return False;
6675 }
6676
6677 /* A7.4.5 Two registers, miscellaneous */
6678 static
6679 Bool dis_neon_data_2reg_misc ( UInt theInstr, IRTemp condT )
6680 {
6681    UInt A = (theInstr >> 16) & 3;
6682    UInt B = (theInstr >> 6) & 0x1f;
6683    UInt Q = (theInstr >> 6) & 1;
6684    UInt U = (theInstr >> 24) & 1;
6685    UInt size = (theInstr >> 18) & 3;
6686    UInt dreg = get_neon_d_regno(theInstr);
6687    UInt mreg = get_neon_m_regno(theInstr);
6688    UInt F = (theInstr >> 10) & 1;
6689    IRTemp arg_d = IRTemp_INVALID;
6690    IRTemp arg_m = IRTemp_INVALID;
6691    IRTemp res = IRTemp_INVALID;
6692    switch (A) {
6693       case 0:
6694          if (Q) {
6695             arg_m = newTemp(Ity_V128);
6696             res = newTemp(Ity_V128);
6697             assign(arg_m, getQReg(mreg));
6698          } else {
6699             arg_m = newTemp(Ity_I64);
6700             res = newTemp(Ity_I64);
6701             assign(arg_m, getDRegI64(mreg));
6702          }
6703          switch (B >> 1) {
6704             case 0: {
6705                /* VREV64 */
6706                IROp op;
6707                switch (size) {
6708                   case 0:
6709                      op = Q ? Iop_Reverse8sIn64_x2 : Iop_Reverse8sIn64_x1;
6710                      break;
6711                   case 1:
6712                      op = Q ? Iop_Reverse16sIn64_x2 : Iop_Reverse16sIn64_x1;
6713                      break;
6714                   case 2:
6715                      op = Q ? Iop_Reverse32sIn64_x2 : Iop_Reverse32sIn64_x1;
6716                      break;
6717                   case 3:
6718                      return False;
6719                   default:
6720                      vassert(0);
6721                }
6722                assign(res, unop(op, mkexpr(arg_m)));
6723                DIP("vrev64.%d %c%u, %c%u\n", 8 << size,
6724                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6725                break;
6726             }
6727             case 1: {
6728                /* VREV32 */
6729                IROp op;
6730                switch (size) {
6731                   case 0:
6732                      op = Q ? Iop_Reverse8sIn32_x4 : Iop_Reverse8sIn32_x2;
6733                      break;
6734                   case 1:
6735                      op = Q ? Iop_Reverse16sIn32_x4 : Iop_Reverse16sIn32_x2;
6736                      break;
6737                   case 2:
6738                   case 3:
6739                      return False;
6740                   default:
6741                      vassert(0);
6742                }
6743                assign(res, unop(op, mkexpr(arg_m)));
6744                DIP("vrev32.%d %c%u, %c%u\n", 8 << size,
6745                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6746                break;
6747             }
6748             case 2: {
6749                /* VREV16 */
6750                IROp op;
6751                switch (size) {
6752                   case 0:
6753                      op = Q ? Iop_Reverse8sIn16_x8 : Iop_Reverse8sIn16_x4;
6754                      break;
6755                   case 1:
6756                   case 2:
6757                   case 3:
6758                      return False;
6759                   default:
6760                      vassert(0);
6761                }
6762                assign(res, unop(op, mkexpr(arg_m)));
6763                DIP("vrev16.%d %c%u, %c%u\n", 8 << size,
6764                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6765                break;
6766             }
6767             case 3:
6768                return False;
6769             case 4:
6770             case 5: {
6771                /* VPADDL */
6772                IROp op;
6773                U = (theInstr >> 7) & 1;
6774                if (Q) {
6775                   switch (size) {
6776                      case 0: op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16; break;
6777                      case 1: op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8; break;
6778                      case 2: op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4; break;
6779                      case 3: return False;
6780                      default: vassert(0);
6781                   }
6782                } else {
6783                   switch (size) {
6784                      case 0: op = U ? Iop_PwAddL8Ux8  : Iop_PwAddL8Sx8;  break;
6785                      case 1: op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4; break;
6786                      case 2: op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2; break;
6787                      case 3: return False;
6788                      default: vassert(0);
6789                   }
6790                }
6791                assign(res, unop(op, mkexpr(arg_m)));
6792                DIP("vpaddl.%c%d %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6793                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6794                break;
6795             }
6796             case 6:
6797             case 7:
6798                return False;
6799             case 8: {
6800                /* VCLS */
6801                IROp op;
6802                switch (size) {
6803                   case 0: op = Q ? Iop_Cls8x16 : Iop_Cls8x8; break;
6804                   case 1: op = Q ? Iop_Cls16x8 : Iop_Cls16x4; break;
6805                   case 2: op = Q ? Iop_Cls32x4 : Iop_Cls32x2; break;
6806                   case 3: return False;
6807                   default: vassert(0);
6808                }
6809                assign(res, unop(op, mkexpr(arg_m)));
6810                DIP("vcls.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6811                    Q ? 'q' : 'd', mreg);
6812                break;
6813             }
6814             case 9: {
6815                /* VCLZ */
6816                IROp op;
6817                switch (size) {
6818                   case 0: op = Q ? Iop_Clz8x16 : Iop_Clz8x8; break;
6819                   case 1: op = Q ? Iop_Clz16x8 : Iop_Clz16x4; break;
6820                   case 2: op = Q ? Iop_Clz32x4 : Iop_Clz32x2; break;
6821                   case 3: return False;
6822                   default: vassert(0);
6823                }
6824                assign(res, unop(op, mkexpr(arg_m)));
6825                DIP("vclz.i%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6826                    Q ? 'q' : 'd', mreg);
6827                break;
6828             }
6829             case 10:
6830                /* VCNT */
6831                assign(res, unop(Q ? Iop_Cnt8x16 : Iop_Cnt8x8, mkexpr(arg_m)));
6832                DIP("vcnt.8 %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6833                    mreg);
6834                break;
6835             case 11:
6836                /* VMVN */
6837                if (Q)
6838                   assign(res, unop(Iop_NotV128, mkexpr(arg_m)));
6839                else
6840                   assign(res, unop(Iop_Not64, mkexpr(arg_m)));
6841                DIP("vmvn %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6842                    mreg);
6843                break;
6844             case 12:
6845             case 13: {
6846                /* VPADAL */
6847                IROp op, add_op;
6848                U = (theInstr >> 7) & 1;
6849                if (Q) {
6850                   switch (size) {
6851                      case 0:
6852                         op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16;
6853                         add_op = Iop_Add16x8;
6854                         break;
6855                      case 1:
6856                         op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8;
6857                         add_op = Iop_Add32x4;
6858                         break;
6859                      case 2:
6860                         op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4;
6861                         add_op = Iop_Add64x2;
6862                         break;
6863                      case 3:
6864                         return False;
6865                      default:
6866                         vassert(0);
6867                   }
6868                } else {
6869                   switch (size) {
6870                      case 0:
6871                         op = U ? Iop_PwAddL8Ux8 : Iop_PwAddL8Sx8;
6872                         add_op = Iop_Add16x4;
6873                         break;
6874                      case 1:
6875                         op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4;
6876                         add_op = Iop_Add32x2;
6877                         break;
6878                      case 2:
6879                         op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2;
6880                         add_op = Iop_Add64;
6881                         break;
6882                      case 3:
6883                         return False;
6884                      default:
6885                         vassert(0);
6886                   }
6887                }
6888                if (Q) {
6889                   arg_d = newTemp(Ity_V128);
6890                   assign(arg_d, getQReg(dreg));
6891                } else {
6892                   arg_d = newTemp(Ity_I64);
6893                   assign(arg_d, getDRegI64(dreg));
6894                }
6895                assign(res, binop(add_op, unop(op, mkexpr(arg_m)),
6896                                          mkexpr(arg_d)));
6897                DIP("vpadal.%c%d %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6898                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6899                break;
6900             }
6901             case 14: {
6902                /* VQABS */
6903                IROp op_sub, op_qsub, op_cmp;
6904                IRTemp mask, tmp;
6905                IRExpr *zero1, *zero2;
6906                IRExpr *neg, *neg2;
6907                if (Q) {
6908                   zero1 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6909                   zero2 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6910                   mask = newTemp(Ity_V128);
6911                   tmp = newTemp(Ity_V128);
6912                } else {
6913                   zero1 = mkU64(0);
6914                   zero2 = mkU64(0);
6915                   mask = newTemp(Ity_I64);
6916                   tmp = newTemp(Ity_I64);
6917                }
6918                switch (size) {
6919                   case 0:
6920                      op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6921                      op_qsub = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6922                      op_cmp = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
6923                      break;
6924                   case 1:
6925                      op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6926                      op_qsub = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6927                      op_cmp = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4;
6928                      break;
6929                   case 2:
6930                      op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6931                      op_qsub = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6932                      op_cmp = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2;
6933                      break;
6934                   case 3:
6935                      return False;
6936                   default:
6937                      vassert(0);
6938                }
6939                assign(mask, binop(op_cmp, mkexpr(arg_m), zero1));
6940                neg = binop(op_qsub, zero2, mkexpr(arg_m));
6941                neg2 = binop(op_sub, zero2, mkexpr(arg_m));
6942                assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
6943                                  binop(Q ? Iop_AndV128 : Iop_And64,
6944                                        mkexpr(mask),
6945                                        mkexpr(arg_m)),
6946                                  binop(Q ? Iop_AndV128 : Iop_And64,
6947                                        unop(Q ? Iop_NotV128 : Iop_Not64,
6948                                             mkexpr(mask)),
6949                                        neg)));
6950                assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
6951                                  binop(Q ? Iop_AndV128 : Iop_And64,
6952                                        mkexpr(mask),
6953                                        mkexpr(arg_m)),
6954                                  binop(Q ? Iop_AndV128 : Iop_And64,
6955                                        unop(Q ? Iop_NotV128 : Iop_Not64,
6956                                             mkexpr(mask)),
6957                                        neg2)));
6958                setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
6959                DIP("vqabs.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6960                    Q ? 'q' : 'd', mreg);
6961                break;
6962             }
6963             case 15: {
6964                /* VQNEG */
6965                IROp op, op2;
6966                IRExpr *zero;
6967                if (Q) {
6968                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6969                } else {
6970                   zero = mkU64(0);
6971                }
6972                switch (size) {
6973                   case 0:
6974                      op = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6975                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6976                      break;
6977                   case 1:
6978                      op = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6979                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6980                      break;
6981                   case 2:
6982                      op = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6983                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6984                      break;
6985                   case 3:
6986                      return False;
6987                   default:
6988                      vassert(0);
6989                }
6990                assign(res, binop(op, zero, mkexpr(arg_m)));
6991                setFlag_QC(mkexpr(res), binop(op2, zero, mkexpr(arg_m)),
6992                           Q, condT);
6993                DIP("vqneg.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6994                    Q ? 'q' : 'd', mreg);
6995                break;
6996             }
6997             default:
6998                vassert(0);
6999          }
7000          if (Q) {
7001             putQReg(dreg, mkexpr(res), condT);
7002          } else {
7003             putDRegI64(dreg, mkexpr(res), condT);
7004          }
7005          return True;
7006       case 1:
7007          if (Q) {
7008             arg_m = newTemp(Ity_V128);
7009             res = newTemp(Ity_V128);
7010             assign(arg_m, getQReg(mreg));
7011          } else {
7012             arg_m = newTemp(Ity_I64);
7013             res = newTemp(Ity_I64);
7014             assign(arg_m, getDRegI64(mreg));
7015          }
7016          switch ((B >> 1) & 0x7) {
7017             case 0: {
7018                /* VCGT #0 */
7019                IRExpr *zero;
7020                IROp op;
7021                if (Q) {
7022                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7023                } else {
7024                   zero = mkU64(0);
7025                }
7026                if (F) {
7027                   switch (size) {
7028                      case 0: case 1: case 3: return False;
7029                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
7030                      default: vassert(0);
7031                   }
7032                } else {
7033                   switch (size) {
7034                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7035                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7036                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7037                      case 3: return False;
7038                      default: vassert(0);
7039                   }
7040                }
7041                assign(res, binop(op, mkexpr(arg_m), zero));
7042                DIP("vcgt.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7043                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7044                break;
7045             }
7046             case 1: {
7047                /* VCGE #0 */
7048                IROp op;
7049                IRExpr *zero;
7050                if (Q) {
7051                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7052                } else {
7053                   zero = mkU64(0);
7054                }
7055                if (F) {
7056                   switch (size) {
7057                      case 0: case 1: case 3: return False;
7058                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
7059                      default: vassert(0);
7060                   }
7061                   assign(res, binop(op, mkexpr(arg_m), zero));
7062                } else {
7063                   switch (size) {
7064                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7065                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7066                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7067                      case 3: return False;
7068                      default: vassert(0);
7069                   }
7070                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7071                                    binop(op, zero, mkexpr(arg_m))));
7072                }
7073                DIP("vcge.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7074                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7075                break;
7076             }
7077             case 2: {
7078                /* VCEQ #0 */
7079                IROp op;
7080                IRExpr *zero;
7081                if (F) {
7082                   if (Q) {
7083                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7084                   } else {
7085                      zero = mkU64(0);
7086                   }
7087                   switch (size) {
7088                      case 0: case 1: case 3: return False;
7089                      case 2: op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2; break;
7090                      default: vassert(0);
7091                   }
7092                   assign(res, binop(op, zero, mkexpr(arg_m)));
7093                } else {
7094                   switch (size) {
7095                      case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
7096                      case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
7097                      case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
7098                      case 3: return False;
7099                      default: vassert(0);
7100                   }
7101                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7102                                    unop(op, mkexpr(arg_m))));
7103                }
7104                DIP("vceq.%c%d %c%u, %c%u, #0\n", F ? 'f' : 'i', 8 << size,
7105                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7106                break;
7107             }
7108             case 3: {
7109                /* VCLE #0 */
7110                IRExpr *zero;
7111                IROp op;
7112                if (Q) {
7113                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7114                } else {
7115                   zero = mkU64(0);
7116                }
7117                if (F) {
7118                   switch (size) {
7119                      case 0: case 1: case 3: return False;
7120                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
7121                      default: vassert(0);
7122                   }
7123                   assign(res, binop(op, zero, mkexpr(arg_m)));
7124                } else {
7125                   switch (size) {
7126                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7127                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7128                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7129                      case 3: return False;
7130                      default: vassert(0);
7131                   }
7132                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7133                                    binop(op, mkexpr(arg_m), zero)));
7134                }
7135                DIP("vcle.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7136                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7137                break;
7138             }
7139             case 4: {
7140                /* VCLT #0 */
7141                IROp op;
7142                IRExpr *zero;
7143                if (Q) {
7144                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7145                } else {
7146                   zero = mkU64(0);
7147                }
7148                if (F) {
7149                   switch (size) {
7150                      case 0: case 1: case 3: return False;
7151                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
7152                      default: vassert(0);
7153                   }
7154                   assign(res, binop(op, zero, mkexpr(arg_m)));
7155                } else {
7156                   switch (size) {
7157                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7158                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7159                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7160                      case 3: return False;
7161                      default: vassert(0);
7162                   }
7163                   assign(res, binop(op, zero, mkexpr(arg_m)));
7164                }
7165                DIP("vclt.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7166                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7167                break;
7168             }
7169             case 5:
7170                return False;
7171             case 6: {
7172                /* VABS */
7173                if (!F) {
7174                   IROp op;
7175                   switch(size) {
7176                      case 0: op = Q ? Iop_Abs8x16 : Iop_Abs8x8; break;
7177                      case 1: op = Q ? Iop_Abs16x8 : Iop_Abs16x4; break;
7178                      case 2: op = Q ? Iop_Abs32x4 : Iop_Abs32x2; break;
7179                      case 3: return False;
7180                      default: vassert(0);
7181                   }
7182                   assign(res, unop(op, mkexpr(arg_m)));
7183                } else {
7184                   assign(res, unop(Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2,
7185                                    mkexpr(arg_m)));
7186                }
7187                DIP("vabs.%c%d %c%u, %c%u\n",
7188                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7189                    Q ? 'q' : 'd', mreg);
7190                break;
7191             }
7192             case 7: {
7193                /* VNEG */
7194                IROp op;
7195                IRExpr *zero;
7196                if (F) {
7197                   switch (size) {
7198                      case 0: case 1: case 3: return False;
7199                      case 2: op = Q ? Iop_Neg32Fx4 : Iop_Neg32Fx2; break;
7200                      default: vassert(0);
7201                   }
7202                   assign(res, unop(op, mkexpr(arg_m)));
7203                } else {
7204                   if (Q) {
7205                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7206                   } else {
7207                      zero = mkU64(0);
7208                   }
7209                   switch (size) {
7210                      case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
7211                      case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
7212                      case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
7213                      case 3: return False;
7214                      default: vassert(0);
7215                   }
7216                   assign(res, binop(op, zero, mkexpr(arg_m)));
7217                }
7218                DIP("vneg.%c%d %c%u, %c%u\n",
7219                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7220                    Q ? 'q' : 'd', mreg);
7221                break;
7222             }
7223             default:
7224                vassert(0);
7225          }
7226          if (Q) {
7227             putQReg(dreg, mkexpr(res), condT);
7228          } else {
7229             putDRegI64(dreg, mkexpr(res), condT);
7230          }
7231          return True;
7232       case 2:
7233          if ((B >> 1) == 0) {
7234             /* VSWP */
7235             if (Q) {
7236                arg_m = newTemp(Ity_V128);
7237                assign(arg_m, getQReg(mreg));
7238                putQReg(mreg, getQReg(dreg), condT);
7239                putQReg(dreg, mkexpr(arg_m), condT);
7240             } else {
7241                arg_m = newTemp(Ity_I64);
7242                assign(arg_m, getDRegI64(mreg));
7243                putDRegI64(mreg, getDRegI64(dreg), condT);
7244                putDRegI64(dreg, mkexpr(arg_m), condT);
7245             }
7246             DIP("vswp %c%u, %c%u\n",
7247                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7248             return True;
7249          } else if ((B >> 1) == 1) {
7250             /* VTRN */
7251             IROp op_odd = Iop_INVALID, op_even = Iop_INVALID;
7252             IRTemp old_m, old_d, new_d, new_m;
7253             if (Q) {
7254                old_m = newTemp(Ity_V128);
7255                old_d = newTemp(Ity_V128);
7256                new_m = newTemp(Ity_V128);
7257                new_d = newTemp(Ity_V128);
7258                assign(old_m, getQReg(mreg));
7259                assign(old_d, getQReg(dreg));
7260             } else {
7261                old_m = newTemp(Ity_I64);
7262                old_d = newTemp(Ity_I64);
7263                new_m = newTemp(Ity_I64);
7264                new_d = newTemp(Ity_I64);
7265                assign(old_m, getDRegI64(mreg));
7266                assign(old_d, getDRegI64(dreg));
7267             }
7268             if (Q) {
7269                switch (size) {
7270                   case 0:
7271                      op_odd  = Iop_InterleaveOddLanes8x16;
7272                      op_even = Iop_InterleaveEvenLanes8x16;
7273                      break;
7274                   case 1:
7275                      op_odd  = Iop_InterleaveOddLanes16x8;
7276                      op_even = Iop_InterleaveEvenLanes16x8;
7277                      break;
7278                   case 2:
7279                      op_odd  = Iop_InterleaveOddLanes32x4;
7280                      op_even = Iop_InterleaveEvenLanes32x4;
7281                      break;
7282                   case 3:
7283                      return False;
7284                   default:
7285                      vassert(0);
7286                }
7287             } else {
7288                switch (size) {
7289                   case 0:
7290                      op_odd  = Iop_InterleaveOddLanes8x8;
7291                      op_even = Iop_InterleaveEvenLanes8x8;
7292                      break;
7293                   case 1:
7294                      op_odd  = Iop_InterleaveOddLanes16x4;
7295                      op_even = Iop_InterleaveEvenLanes16x4;
7296                      break;
7297                   case 2:
7298                      op_odd  = Iop_InterleaveHI32x2;
7299                      op_even = Iop_InterleaveLO32x2;
7300                      break;
7301                   case 3:
7302                      return False;
7303                   default:
7304                      vassert(0);
7305                }
7306             }
7307             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7308             assign(new_m, binop(op_odd, mkexpr(old_m), mkexpr(old_d)));
7309             if (Q) {
7310                putQReg(dreg, mkexpr(new_d), condT);
7311                putQReg(mreg, mkexpr(new_m), condT);
7312             } else {
7313                putDRegI64(dreg, mkexpr(new_d), condT);
7314                putDRegI64(mreg, mkexpr(new_m), condT);
7315             }
7316             DIP("vtrn.%d %c%u, %c%u\n",
7317                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7318             return True;
7319          } else if ((B >> 1) == 2) {
7320             /* VUZP */
7321             IROp op_even, op_odd;
7322             IRTemp old_m, old_d, new_m, new_d;
7323             if (!Q && size == 2)
7324                return False;
7325             if (Q) {
7326                old_m = newTemp(Ity_V128);
7327                old_d = newTemp(Ity_V128);
7328                new_m = newTemp(Ity_V128);
7329                new_d = newTemp(Ity_V128);
7330                assign(old_m, getQReg(mreg));
7331                assign(old_d, getQReg(dreg));
7332             } else {
7333                old_m = newTemp(Ity_I64);
7334                old_d = newTemp(Ity_I64);
7335                new_m = newTemp(Ity_I64);
7336                new_d = newTemp(Ity_I64);
7337                assign(old_m, getDRegI64(mreg));
7338                assign(old_d, getDRegI64(dreg));
7339             }
7340             switch (size) {
7341                case 0:
7342                   op_odd  = Q ? Iop_CatOddLanes8x16 : Iop_CatOddLanes8x8;
7343                   op_even = Q ? Iop_CatEvenLanes8x16 : Iop_CatEvenLanes8x8;
7344                   break;
7345                case 1:
7346                   op_odd  = Q ? Iop_CatOddLanes16x8 : Iop_CatOddLanes16x4;
7347                   op_even = Q ? Iop_CatEvenLanes16x8 : Iop_CatEvenLanes16x4;
7348                   break;
7349                case 2:
7350                   op_odd  = Iop_CatOddLanes32x4;
7351                   op_even = Iop_CatEvenLanes32x4;
7352                   break;
7353                case 3:
7354                   return False;
7355                default:
7356                   vassert(0);
7357             }
7358             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7359             assign(new_m, binop(op_odd,  mkexpr(old_m), mkexpr(old_d)));
7360             if (Q) {
7361                putQReg(dreg, mkexpr(new_d), condT);
7362                putQReg(mreg, mkexpr(new_m), condT);
7363             } else {
7364                putDRegI64(dreg, mkexpr(new_d), condT);
7365                putDRegI64(mreg, mkexpr(new_m), condT);
7366             }
7367             DIP("vuzp.%d %c%u, %c%u\n",
7368                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7369             return True;
7370          } else if ((B >> 1) == 3) {
7371             /* VZIP */
7372             IROp op_lo, op_hi;
7373             IRTemp old_m, old_d, new_m, new_d;
7374             if (!Q && size == 2)
7375                return False;
7376             if (Q) {
7377                old_m = newTemp(Ity_V128);
7378                old_d = newTemp(Ity_V128);
7379                new_m = newTemp(Ity_V128);
7380                new_d = newTemp(Ity_V128);
7381                assign(old_m, getQReg(mreg));
7382                assign(old_d, getQReg(dreg));
7383             } else {
7384                old_m = newTemp(Ity_I64);
7385                old_d = newTemp(Ity_I64);
7386                new_m = newTemp(Ity_I64);
7387                new_d = newTemp(Ity_I64);
7388                assign(old_m, getDRegI64(mreg));
7389                assign(old_d, getDRegI64(dreg));
7390             }
7391             switch (size) {
7392                case 0:
7393                   op_hi = Q ? Iop_InterleaveHI8x16 : Iop_InterleaveHI8x8;
7394                   op_lo = Q ? Iop_InterleaveLO8x16 : Iop_InterleaveLO8x8;
7395                   break;
7396                case 1:
7397                   op_hi = Q ? Iop_InterleaveHI16x8 : Iop_InterleaveHI16x4;
7398                   op_lo = Q ? Iop_InterleaveLO16x8 : Iop_InterleaveLO16x4;
7399                   break;
7400                case 2:
7401                   op_hi = Iop_InterleaveHI32x4;
7402                   op_lo = Iop_InterleaveLO32x4;
7403                   break;
7404                case 3:
7405                   return False;
7406                default:
7407                   vassert(0);
7408             }
7409             assign(new_d, binop(op_lo, mkexpr(old_m), mkexpr(old_d)));
7410             assign(new_m, binop(op_hi, mkexpr(old_m), mkexpr(old_d)));
7411             if (Q) {
7412                putQReg(dreg, mkexpr(new_d), condT);
7413                putQReg(mreg, mkexpr(new_m), condT);
7414             } else {
7415                putDRegI64(dreg, mkexpr(new_d), condT);
7416                putDRegI64(mreg, mkexpr(new_m), condT);
7417             }
7418             DIP("vzip.%d %c%u, %c%u\n",
7419                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7420             return True;
7421          } else if (B == 8) {
7422             /* VMOVN */
7423             IROp op;
7424             mreg >>= 1;
7425             switch (size) {
7426                case 0: op = Iop_NarrowUn16to8x8;  break;
7427                case 1: op = Iop_NarrowUn32to16x4; break;
7428                case 2: op = Iop_NarrowUn64to32x2; break;
7429                case 3: return False;
7430                default: vassert(0);
7431             }
7432             putDRegI64(dreg, unop(op, getQReg(mreg)), condT);
7433             DIP("vmovn.i%d d%u, q%u\n", 16 << size, dreg, mreg);
7434             return True;
7435          } else if (B == 9 || (B >> 1) == 5) {
7436             /* VQMOVN, VQMOVUN */
7437             IROp op, op2;
7438             IRTemp tmp;
7439             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
7440             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
7441             if (mreg & 1)
7442                return False;
7443             mreg >>= 1;
7444             switch (size) {
7445                case 0: op2 = Iop_NarrowUn16to8x8;  break;
7446                case 1: op2 = Iop_NarrowUn32to16x4; break;
7447                case 2: op2 = Iop_NarrowUn64to32x2; break;
7448                case 3: return False;
7449                default: vassert(0);
7450             }
7451             switch (B & 3) {
7452                case 0:
7453                   vassert(0);
7454                case 1:
7455                   switch (size) {
7456                      case 0: op = Iop_QNarrowUn16Sto8Ux8;  break;
7457                      case 1: op = Iop_QNarrowUn32Sto16Ux4; break;
7458                      case 2: op = Iop_QNarrowUn64Sto32Ux2; break;
7459                      case 3: return False;
7460                      default: vassert(0);
7461                   }
7462                   DIP("vqmovun.s%d d%u, q%u\n", 16 << size, dreg, mreg);
7463                   break;
7464                case 2:
7465                   switch (size) {
7466                      case 0: op = Iop_QNarrowUn16Sto8Sx8;  break;
7467                      case 1: op = Iop_QNarrowUn32Sto16Sx4; break;
7468                      case 2: op = Iop_QNarrowUn64Sto32Sx2; break;
7469                      case 3: return False;
7470                      default: vassert(0);
7471                   }
7472                   DIP("vqmovn.s%d d%u, q%u\n", 16 << size, dreg, mreg);
7473                   break;
7474                case 3:
7475                   switch (size) {
7476                      case 0: op = Iop_QNarrowUn16Uto8Ux8;  break;
7477                      case 1: op = Iop_QNarrowUn32Uto16Ux4; break;
7478                      case 2: op = Iop_QNarrowUn64Uto32Ux2; break;
7479                      case 3: return False;
7480                      default: vassert(0);
7481                   }
7482                   DIP("vqmovn.u%d d%u, q%u\n", 16 << size, dreg, mreg);
7483                   break;
7484                default:
7485                   vassert(0);
7486             }
7487             res = newTemp(Ity_I64);
7488             tmp = newTemp(Ity_I64);
7489             assign(res, unop(op, getQReg(mreg)));
7490             assign(tmp, unop(op2, getQReg(mreg)));
7491             setFlag_QC(mkexpr(res), mkexpr(tmp), False, condT);
7492             putDRegI64(dreg, mkexpr(res), condT);
7493             return True;
7494          } else if (B == 12) {
7495             /* VSHLL (maximum shift) */
7496             IROp op, cvt;
7497             UInt shift_imm;
7498             if (Q)
7499                return False;
7500             if (dreg & 1)
7501                return False;
7502             dreg >>= 1;
7503             shift_imm = 8 << size;
7504             res = newTemp(Ity_V128);
7505             switch (size) {
7506                case 0: op = Iop_ShlN16x8; cvt = Iop_Widen8Uto16x8;  break;
7507                case 1: op = Iop_ShlN32x4; cvt = Iop_Widen16Uto32x4; break;
7508                case 2: op = Iop_ShlN64x2; cvt = Iop_Widen32Uto64x2; break;
7509                case 3: return False;
7510                default: vassert(0);
7511             }
7512             assign(res, binop(op, unop(cvt, getDRegI64(mreg)),
7513                                   mkU8(shift_imm)));
7514             putQReg(dreg, mkexpr(res), condT);
7515             DIP("vshll.i%d q%u, d%u, #%d\n", 8 << size, dreg, mreg, 8 << size);
7516             return True;
7517          } else if ((B >> 3) == 3 && (B & 3) == 0) {
7518             /* VCVT (half<->single) */
7519             /* Half-precision extensions are needed to run this */
7520             vassert(0); // ATC
7521             if (((theInstr >> 18) & 3) != 1)
7522                return False;
7523             if ((theInstr >> 8) & 1) {
7524                if (dreg & 1)
7525                   return False;
7526                dreg >>= 1;
7527                putQReg(dreg, unop(Iop_F16toF32x4, getDRegI64(mreg)),
7528                      condT);
7529                DIP("vcvt.f32.f16 q%u, d%u\n", dreg, mreg);
7530             } else {
7531                if (mreg & 1)
7532                   return False;
7533                mreg >>= 1;
7534                putDRegI64(dreg, unop(Iop_F32toF16x4_DEP, getQReg(mreg)),
7535                                 condT);
7536                DIP("vcvt.f16.f32 d%u, q%u\n", dreg, mreg);
7537             }
7538             return True;
7539          } else {
7540             return False;
7541          }
7542          vassert(0);
7543          return True;
7544       case 3:
7545          if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,0)) {
7546             /* VRECPE */
7547             IROp op;
7548             F = (theInstr >> 8) & 1;
7549             if (size != 2)
7550                return False;
7551             if (Q) {
7552                op = F ? Iop_RecipEst32Fx4 : Iop_RecipEst32Ux4;
7553                putQReg(dreg, unop(op, getQReg(mreg)), condT);
7554                DIP("vrecpe.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7555             } else {
7556                op = F ? Iop_RecipEst32Fx2 : Iop_RecipEst32Ux2;
7557                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7558                DIP("vrecpe.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7559             }
7560             return True;
7561          } else if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,1)) {
7562             /* VRSQRTE */
7563             IROp op;
7564             F = (B >> 2) & 1;
7565             if (size != 2)
7566                return False;
7567             if (F) {
7568                /* fp */
7569                op = Q ? Iop_RSqrtEst32Fx4 : Iop_RSqrtEst32Fx2;
7570             } else {
7571                /* unsigned int */
7572                op = Q ? Iop_RSqrtEst32Ux4 : Iop_RSqrtEst32Ux2;
7573             }
7574             if (Q) {
7575                putQReg(dreg, unop(op, getQReg(mreg)), condT);
7576                DIP("vrsqrte.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7577             } else {
7578                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7579                DIP("vrsqrte.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7580             }
7581             return True;
7582          } else if ((B >> 3) == 3) {
7583             /* VCVT (fp<->integer) */
7584             IROp op;
7585             if (size != 2)
7586                return False;
7587             switch ((B >> 1) & 3) {
7588                case 0:
7589                   op = Q ? Iop_I32StoF32x4_DEP : Iop_I32StoF32x2_DEP;
7590                   DIP("vcvt.f32.s32 %c%u, %c%u\n",
7591                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7592                   break;
7593                case 1:
7594                   op = Q ? Iop_I32UtoF32x4_DEP : Iop_I32UtoF32x2_DEP;
7595                   DIP("vcvt.f32.u32 %c%u, %c%u\n",
7596                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7597                   break;
7598                case 2:
7599                   op = Q ? Iop_F32toI32Sx4_RZ : Iop_F32toI32Sx2_RZ;
7600                   DIP("vcvt.s32.f32 %c%u, %c%u\n",
7601                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7602                   break;
7603                case 3:
7604                   op = Q ? Iop_F32toI32Ux4_RZ : Iop_F32toI32Ux2_RZ;
7605                   DIP("vcvt.u32.f32 %c%u, %c%u\n",
7606                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7607                   break;
7608                default:
7609                   vassert(0);
7610             }
7611             if (Q) {
7612                putQReg(dreg, unop(op, getQReg(mreg)), condT);
7613             } else {
7614                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7615             }
7616             return True;
7617          } else {
7618             return False;
7619          }
7620          vassert(0);
7621          return True;
7622       default:
7623          vassert(0);
7624    }
7625    return False;
7626 }
7627
7628 /* A7.4.6 One register and a modified immediate value */
7629 static
7630 void ppNeonImm(UInt imm, UInt cmode, UInt op)
7631 {
7632    int i;
7633    switch (cmode) {
7634       case 0: case 1: case 8: case 9:
7635          vex_printf("0x%x", imm);
7636          break;
7637       case 2: case 3: case 10: case 11:
7638          vex_printf("0x%x00", imm);
7639          break;
7640       case 4: case 5:
7641          vex_printf("0x%x0000", imm);
7642          break;
7643       case 6: case 7:
7644          vex_printf("0x%x000000", imm);
7645          break;
7646       case 12:
7647          vex_printf("0x%xff", imm);
7648          break;
7649       case 13:
7650          vex_printf("0x%xffff", imm);
7651          break;
7652       case 14:
7653          if (op) {
7654             vex_printf("0x");
7655             for (i = 7; i >= 0; i--)
7656                vex_printf("%s", (imm & (1 << i)) ? "ff" : "00");
7657          } else {
7658             vex_printf("0x%x", imm);
7659          }
7660          break;
7661       case 15:
7662          vex_printf("0x%x", imm);
7663          break;
7664    }
7665 }
7666
7667 static
7668 const char *ppNeonImmType(UInt cmode, UInt op)
7669 {
7670    switch (cmode) {
7671       case 0 ... 7:
7672       case 12: case 13:
7673          return "i32";
7674       case 8 ... 11:
7675          return "i16";
7676       case 14:
7677          if (op)
7678             return "i64";
7679          else
7680             return "i8";
7681       case 15:
7682          if (op)
7683             vassert(0);
7684          else
7685             return "f32";
7686       default:
7687          vassert(0);
7688    }
7689 }
7690
7691 static
7692 void DIPimm(UInt imm, UInt cmode, UInt op,
7693             const char *instr, UInt Q, UInt dreg)
7694 {
7695    if (vex_traceflags & VEX_TRACE_FE) {
7696       vex_printf("%s.%s %c%u, #", instr,
7697                  ppNeonImmType(cmode, op), Q ? 'q' : 'd', dreg);
7698       ppNeonImm(imm, cmode, op);
7699       vex_printf("\n");
7700    }
7701 }
7702
7703 static
7704 Bool dis_neon_data_1reg_and_imm ( UInt theInstr, IRTemp condT )
7705 {
7706    UInt dreg = get_neon_d_regno(theInstr);
7707    ULong imm_raw = ((theInstr >> 17) & 0x80) | ((theInstr >> 12) & 0x70) |
7708                   (theInstr & 0xf);
7709    ULong imm_raw_pp = imm_raw;
7710    UInt cmode = (theInstr >> 8) & 0xf;
7711    UInt op_bit = (theInstr >> 5) & 1;
7712    ULong imm = 0;
7713    UInt Q = (theInstr >> 6) & 1;
7714    int i, j;
7715    UInt tmp;
7716    IRExpr *imm_val;
7717    IRExpr *expr;
7718    IRTemp tmp_var;
7719    switch(cmode) {
7720       case 7: case 6:
7721          imm_raw = imm_raw << 8;
7722          /* fallthrough */
7723       case 5: case 4:
7724          imm_raw = imm_raw << 8;
7725          /* fallthrough */
7726       case 3: case 2:
7727          imm_raw = imm_raw << 8;
7728          /* fallthrough */
7729       case 0: case 1:
7730          imm = (imm_raw << 32) | imm_raw;
7731          break;
7732       case 11: case 10:
7733          imm_raw = imm_raw << 8;
7734          /* fallthrough */
7735       case 9: case 8:
7736          imm_raw = (imm_raw << 16) | imm_raw;
7737          imm = (imm_raw << 32) | imm_raw;
7738          break;
7739       case 13:
7740          imm_raw = (imm_raw << 8) | 0xff;
7741          /* fallthrough */
7742       case 12:
7743          imm_raw = (imm_raw << 8) | 0xff;
7744          imm = (imm_raw << 32) | imm_raw;
7745          break;
7746       case 14:
7747          if (! op_bit) {
7748             for(i = 0; i < 8; i++) {
7749                imm = (imm << 8) | imm_raw;
7750             }
7751          } else {
7752             for(i = 7; i >= 0; i--) {
7753                tmp = 0;
7754                for(j = 0; j < 8; j++) {
7755                   tmp = (tmp << 1) | ((imm_raw >> i) & 1);
7756                }
7757                imm = (imm << 8) | tmp;
7758             }
7759          }
7760          break;
7761       case 15:
7762          imm = (imm_raw & 0x80) << 5;
7763          imm |= ((~imm_raw & 0x40) << 5);
7764          for(i = 1; i <= 4; i++)
7765             imm |= (imm_raw & 0x40) << i;
7766          imm |= (imm_raw & 0x7f);
7767          imm = imm << 19;
7768          imm = (imm << 32) | imm;
7769          break;
7770       default:
7771          return False;
7772    }
7773    if (Q) {
7774       imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
7775    } else {
7776       imm_val = mkU64(imm);
7777    }
7778    if (((op_bit == 0) &&
7779       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 12) == 12))) ||
7780       ((op_bit == 1) && (cmode == 14))) {
7781       /* VMOV (immediate) */
7782       if (Q) {
7783          putQReg(dreg, imm_val, condT);
7784       } else {
7785          putDRegI64(dreg, imm_val, condT);
7786       }
7787       DIPimm(imm_raw_pp, cmode, op_bit, "vmov", Q, dreg);
7788       return True;
7789    }
7790    if ((op_bit == 1) &&
7791       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 14) == 12))) {
7792       /* VMVN (immediate) */
7793       if (Q) {
7794          putQReg(dreg, unop(Iop_NotV128, imm_val), condT);
7795       } else {
7796          putDRegI64(dreg, unop(Iop_Not64, imm_val), condT);
7797       }
7798       DIPimm(imm_raw_pp, cmode, op_bit, "vmvn", Q, dreg);
7799       return True;
7800    }
7801    if (Q) {
7802       tmp_var = newTemp(Ity_V128);
7803       assign(tmp_var, getQReg(dreg));
7804    } else {
7805       tmp_var = newTemp(Ity_I64);
7806       assign(tmp_var, getDRegI64(dreg));
7807    }
7808    if ((op_bit == 0) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7809       /* VORR (immediate) */
7810       if (Q)
7811          expr = binop(Iop_OrV128, mkexpr(tmp_var), imm_val);
7812       else
7813          expr = binop(Iop_Or64, mkexpr(tmp_var), imm_val);
7814       DIPimm(imm_raw_pp, cmode, op_bit, "vorr", Q, dreg);
7815    } else if ((op_bit == 1) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7816       /* VBIC (immediate) */
7817       if (Q)
7818          expr = binop(Iop_AndV128, mkexpr(tmp_var),
7819                                    unop(Iop_NotV128, imm_val));
7820       else
7821          expr = binop(Iop_And64, mkexpr(tmp_var), unop(Iop_Not64, imm_val));
7822       DIPimm(imm_raw_pp, cmode, op_bit, "vbic", Q, dreg);
7823    } else {
7824       return False;
7825    }
7826    if (Q)
7827       putQReg(dreg, expr, condT);
7828    else
7829       putDRegI64(dreg, expr, condT);
7830    return True;
7831 }
7832
7833 /* A7.4 Advanced SIMD data-processing instructions */
7834 static
7835 Bool dis_neon_data_processing ( UInt theInstr, IRTemp condT )
7836 {
7837    UInt A = (theInstr >> 19) & 0x1F;
7838    UInt B = (theInstr >>  8) & 0xF;
7839    UInt C = (theInstr >>  4) & 0xF;
7840    UInt U = (theInstr >> 24) & 0x1;
7841
7842    if (! (A & 0x10)) {
7843       return dis_neon_data_3same(theInstr, condT);
7844    }
7845    if (((A & 0x17) == 0x10) && ((C & 0x9) == 0x1)) {
7846       return dis_neon_data_1reg_and_imm(theInstr, condT);
7847    }
7848    if ((C & 1) == 1) {
7849       return dis_neon_data_2reg_and_shift(theInstr, condT);
7850    }
7851    if (((C & 5) == 0) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7852       return dis_neon_data_3diff(theInstr, condT);
7853    }
7854    if (((C & 5) == 4) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7855       return dis_neon_data_2reg_and_scalar(theInstr, condT);
7856    }
7857    if ((A & 0x16) == 0x16) {
7858       if ((U == 0) && ((C & 1) == 0)) {
7859          return dis_neon_vext(theInstr, condT);
7860       }
7861       if ((U != 1) || ((C & 1) == 1))
7862          return False;
7863       if ((B & 8) == 0) {
7864          return dis_neon_data_2reg_misc(theInstr, condT);
7865       }
7866       if ((B & 12) == 8) {
7867          return dis_neon_vtb(theInstr, condT);
7868       }
7869       if ((B == 12) && ((C & 9) == 0)) {
7870          return dis_neon_vdup(theInstr, condT);
7871       }
7872    }
7873    return False;
7874 }
7875
7876
7877 /*------------------------------------------------------------*/
7878 /*--- NEON loads and stores                                ---*/
7879 /*------------------------------------------------------------*/
7880
7881 /* For NEON memory operations, we use the standard scheme to handle
7882    conditionalisation: generate a jump around the instruction if the
7883    condition is false.  That's only necessary in Thumb mode, however,
7884    since in ARM mode NEON instructions are unconditional. */
7885
7886 /* A helper function for what follows.  It assumes we already went
7887    uncond as per comments at the top of this section. */
7888 static
7889 void mk_neon_elem_load_to_one_lane( UInt rD, UInt inc, UInt index,
7890                                     UInt N, UInt size, IRTemp addr )
7891 {
7892    UInt i;
7893    switch (size) {
7894       case 0:
7895          putDRegI64(rD, triop(Iop_SetElem8x8, getDRegI64(rD), mkU8(index),
7896                     loadLE(Ity_I8, mkexpr(addr))), IRTemp_INVALID);
7897          break;
7898       case 1:
7899          putDRegI64(rD, triop(Iop_SetElem16x4, getDRegI64(rD), mkU8(index),
7900                     loadLE(Ity_I16, mkexpr(addr))), IRTemp_INVALID);
7901          break;
7902       case 2:
7903          putDRegI64(rD, triop(Iop_SetElem32x2, getDRegI64(rD), mkU8(index),
7904                     loadLE(Ity_I32, mkexpr(addr))), IRTemp_INVALID);
7905          break;
7906       default:
7907          vassert(0);
7908    }
7909    for (i = 1; i <= N; i++) {
7910       switch (size) {
7911          case 0:
7912             putDRegI64(rD + i * inc,
7913                        triop(Iop_SetElem8x8,
7914                              getDRegI64(rD + i * inc),
7915                              mkU8(index),
7916                              loadLE(Ity_I8, binop(Iop_Add32,
7917                                                   mkexpr(addr),
7918                                                   mkU32(i * 1)))),
7919                        IRTemp_INVALID);
7920             break;
7921          case 1:
7922             putDRegI64(rD + i * inc,
7923                        triop(Iop_SetElem16x4,
7924                              getDRegI64(rD + i * inc),
7925                              mkU8(index),
7926                              loadLE(Ity_I16, binop(Iop_Add32,
7927                                                    mkexpr(addr),
7928                                                    mkU32(i * 2)))),
7929                        IRTemp_INVALID);
7930             break;
7931          case 2:
7932             putDRegI64(rD + i * inc,
7933                        triop(Iop_SetElem32x2,
7934                              getDRegI64(rD + i * inc),
7935                              mkU8(index),
7936                              loadLE(Ity_I32, binop(Iop_Add32,
7937                                                    mkexpr(addr),
7938                                                    mkU32(i * 4)))),
7939                        IRTemp_INVALID);
7940             break;
7941          default:
7942             vassert(0);
7943       }
7944    }
7945 }
7946
7947 /* A(nother) helper function for what follows.  It assumes we already
7948    went uncond as per comments at the top of this section. */
7949 static
7950 void mk_neon_elem_store_from_one_lane( UInt rD, UInt inc, UInt index,
7951                                        UInt N, UInt size, IRTemp addr )
7952 {
7953    UInt i;
7954    switch (size) {
7955       case 0:
7956          storeLE(mkexpr(addr),
7957                  binop(Iop_GetElem8x8, getDRegI64(rD), mkU8(index)));
7958          break;
7959       case 1:
7960          storeLE(mkexpr(addr),
7961                  binop(Iop_GetElem16x4, getDRegI64(rD), mkU8(index)));
7962          break;
7963       case 2:
7964          storeLE(mkexpr(addr),
7965                  binop(Iop_GetElem32x2, getDRegI64(rD), mkU8(index)));
7966          break;
7967       default:
7968          vassert(0);
7969    }
7970    for (i = 1; i <= N; i++) {
7971       switch (size) {
7972          case 0:
7973             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 1)),
7974                     binop(Iop_GetElem8x8, getDRegI64(rD + i * inc),
7975                                           mkU8(index)));
7976             break;
7977          case 1:
7978             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 2)),
7979                     binop(Iop_GetElem16x4, getDRegI64(rD + i * inc),
7980                                            mkU8(index)));
7981             break;
7982          case 2:
7983             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 4)),
7984                     binop(Iop_GetElem32x2, getDRegI64(rD + i * inc),
7985                                            mkU8(index)));
7986             break;
7987          default:
7988             vassert(0);
7989       }
7990    }
7991 }
7992
7993 /* Generate 2x64 -> 2x64 deinterleave code, for VLD2.  Caller must
7994    make *u0 and *u1 be valid IRTemps before the call. */
7995 static void math_DEINTERLEAVE_2 (/*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
7996                                  IRTemp i0, IRTemp i1, Int laneszB)
7997 {
7998    /* The following assumes that the guest is little endian, and hence
7999       that the memory-side (interleaved) data is stored
8000       little-endianly. */
8001    vassert(u0 && u1);
8002    /* This is pretty easy, since we have primitives directly to
8003       hand. */
8004    if (laneszB == 4) {
8005       // memLE(128 bits) == A0 B0 A1 B1
8006       // i0 == B0 A0, i1 == B1 A1
8007       // u0 == A1 A0, u1 == B1 B0
8008       assign(*u0, binop(Iop_InterleaveLO32x2, mkexpr(i1), mkexpr(i0)));
8009       assign(*u1, binop(Iop_InterleaveHI32x2, mkexpr(i1), mkexpr(i0)));
8010    } else if (laneszB == 2) {
8011       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
8012       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
8013       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
8014       assign(*u0, binop(Iop_CatEvenLanes16x4, mkexpr(i1), mkexpr(i0)));
8015       assign(*u1, binop(Iop_CatOddLanes16x4,  mkexpr(i1), mkexpr(i0)));
8016    } else if (laneszB == 1) {
8017       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
8018       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
8019       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
8020       assign(*u0, binop(Iop_CatEvenLanes8x8, mkexpr(i1), mkexpr(i0)));
8021       assign(*u1, binop(Iop_CatOddLanes8x8,  mkexpr(i1), mkexpr(i0)));
8022    } else {
8023       // Can never happen, since VLD2 only has valid lane widths of 32,
8024       // 16 or 8 bits.
8025       vpanic("math_DEINTERLEAVE_2");
8026    }
8027 }
8028
8029 /* Generate 2x64 -> 2x64 interleave code, for VST2.  Caller must make
8030    *u0 and *u1 be valid IRTemps before the call. */
8031 static void math_INTERLEAVE_2 (/*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
8032                                IRTemp u0, IRTemp u1, Int laneszB)
8033 {
8034    /* The following assumes that the guest is little endian, and hence
8035       that the memory-side (interleaved) data is stored
8036       little-endianly. */
8037    vassert(i0 && i1);
8038    /* This is pretty easy, since we have primitives directly to
8039       hand. */
8040    if (laneszB == 4) {
8041       // memLE(128 bits) == A0 B0 A1 B1
8042       // i0 == B0 A0, i1 == B1 A1
8043       // u0 == A1 A0, u1 == B1 B0
8044       assign(*i0, binop(Iop_InterleaveLO32x2, mkexpr(u1), mkexpr(u0)));
8045       assign(*i1, binop(Iop_InterleaveHI32x2, mkexpr(u1), mkexpr(u0)));
8046    } else if (laneszB == 2) {
8047       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
8048       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
8049       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
8050       assign(*i0, binop(Iop_InterleaveLO16x4, mkexpr(u1), mkexpr(u0)));
8051       assign(*i1, binop(Iop_InterleaveHI16x4, mkexpr(u1), mkexpr(u0)));
8052    } else if (laneszB == 1) {
8053       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
8054       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
8055       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
8056       assign(*i0, binop(Iop_InterleaveLO8x8, mkexpr(u1), mkexpr(u0)));
8057       assign(*i1, binop(Iop_InterleaveHI8x8, mkexpr(u1), mkexpr(u0)));
8058    } else {
8059       // Can never happen, since VST2 only has valid lane widths of 32,
8060       // 16 or 8 bits.
8061       vpanic("math_INTERLEAVE_2");
8062    }
8063 }
8064
8065 // Helper function for generating arbitrary slicing 'n' dicing of
8066 // 3 8x8 vectors, as needed for VLD3.8 and VST3.8.
8067 static IRExpr* math_PERM_8x8x3(const UChar* desc,
8068                                IRTemp s0, IRTemp s1, IRTemp s2)
8069 {
8070    // desc is an array of 8 pairs, encoded as 16 bytes,
8071    // that describe how to assemble the result lanes, starting with
8072    // lane 7.  Each pair is: first component (0..2) says which of
8073    // s0/s1/s2 to use.  Second component (0..7) is the lane number
8074    // in the source to use.
8075    UInt si;
8076    for (si = 0; si < 7; si++) {
8077       vassert(desc[2 * si + 0] <= 2);
8078       vassert(desc[2 * si + 1] <= 7);
8079    }
8080    IRTemp h3 = newTemp(Ity_I64);
8081    IRTemp h2 = newTemp(Ity_I64);
8082    IRTemp h1 = newTemp(Ity_I64);
8083    IRTemp h0 = newTemp(Ity_I64);
8084    IRTemp srcs[3] = {s0, s1, s2};
8085 #  define SRC_VEC(_lane)   mkexpr(srcs[desc[2 * (7-(_lane)) + 0]])
8086 #  define SRC_SHIFT(_lane) mkU8(56-8*(desc[2 * (7-(_lane)) + 1]))
8087    assign(h3, binop(Iop_InterleaveHI8x8,
8088                     binop(Iop_Shl64, SRC_VEC(7), SRC_SHIFT(7)),
8089                     binop(Iop_Shl64, SRC_VEC(6), SRC_SHIFT(6))));
8090    assign(h2, binop(Iop_InterleaveHI8x8,
8091                     binop(Iop_Shl64, SRC_VEC(5), SRC_SHIFT(5)),
8092                     binop(Iop_Shl64, SRC_VEC(4), SRC_SHIFT(4))));
8093    assign(h1, binop(Iop_InterleaveHI8x8,
8094                     binop(Iop_Shl64, SRC_VEC(3), SRC_SHIFT(3)),
8095                     binop(Iop_Shl64, SRC_VEC(2), SRC_SHIFT(2))));
8096    assign(h0, binop(Iop_InterleaveHI8x8,
8097                     binop(Iop_Shl64, SRC_VEC(1), SRC_SHIFT(1)),
8098                     binop(Iop_Shl64, SRC_VEC(0), SRC_SHIFT(0))));
8099 #  undef SRC_VEC
8100 #  undef SRC_SHIFT
8101    // Now h3..h0 are 64 bit vectors with useful information only
8102    // in the top 16 bits.  We now concatentate those four 16-bit
8103    // groups so as to produce the final result.
8104    IRTemp w1 = newTemp(Ity_I64);
8105    IRTemp w0 = newTemp(Ity_I64);
8106    assign(w1, binop(Iop_InterleaveHI16x4, mkexpr(h3), mkexpr(h2)));
8107    assign(w0, binop(Iop_InterleaveHI16x4, mkexpr(h1), mkexpr(h0)));
8108    return binop(Iop_InterleaveHI32x2, mkexpr(w1), mkexpr(w0));
8109 }
8110
8111 /* Generate 3x64 -> 3x64 deinterleave code, for VLD3.  Caller must
8112    make *u0, *u1 and *u2 be valid IRTemps before the call. */
8113 static void math_DEINTERLEAVE_3 (
8114                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1, /*OUT*/IRTemp* u2,
8115                IRTemp i0, IRTemp i1, IRTemp i2, Int laneszB
8116             )
8117 {
8118 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8119 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8120 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8121    /* The following assumes that the guest is little endian, and hence
8122       that the memory-side (interleaved) data is stored
8123       little-endianly. */
8124    vassert(u0 && u1 && u2);
8125    if (laneszB == 4) {
8126       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8127       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8128       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8129       assign(*u0, IHI32x2(SHL64(i1,  0), SHL64(i0, 32)));
8130       assign(*u1, IHI32x2(SHL64(i2, 32), SHL64(i0,  0)));
8131       assign(*u2, IHI32x2(SHL64(i2,  0), SHL64(i1, 32)));
8132    } else if (laneszB == 2) {
8133       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8134       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8135       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8136 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8137                 IHI32x2(                                      \
8138                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8139                            SHL64((_tmp2),48-16*(_la2))),      \
8140                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8141                            SHL64((_tmp0),48-16*(_la0))))
8142       assign(*u0, XXX(i2,1, i1,2, i0,3, i0,0));
8143       assign(*u1, XXX(i2,2, i1,3, i1,0, i0,1));
8144       assign(*u2, XXX(i2,3, i2,0, i1,1, i0,2));
8145 #     undef XXX
8146    } else if (laneszB == 1) {
8147       // These describe how the result vectors [7..0] are
8148       // assembled from the source vectors.  Each pair is
8149       // (source vector number, lane number).
8150       static const UChar de0[16] = {2,5, 2,2, 1,7, 1,4, 1,1, 0,6, 0,3, 0,0};
8151       static const UChar de1[16] = {2,6, 2,3, 2,0, 1,5, 1,2, 0,7, 0,4, 0,1};
8152       static const UChar de2[16] = {2,7, 2,4, 2,1, 1,6, 1,3, 1,0, 0,5, 0,2};
8153       assign(*u0, math_PERM_8x8x3(de0, i0, i1, i2));
8154       assign(*u1, math_PERM_8x8x3(de1, i0, i1, i2));
8155       assign(*u2, math_PERM_8x8x3(de2, i0, i1, i2));
8156    } else {
8157       // Can never happen, since VLD3 only has valid lane widths of 32,
8158       // 16 or 8 bits.
8159       vpanic("math_DEINTERLEAVE_3");
8160    }
8161 #  undef SHL64
8162 #  undef IHI16x4
8163 #  undef IHI32x2
8164 }
8165
8166 /* Generate 3x64 -> 3x64 interleave code, for VST3.  Caller must
8167    make *i0, *i1 and *i2 be valid IRTemps before the call. */
8168 static void math_INTERLEAVE_3 (
8169                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1, /*OUT*/IRTemp* i2,
8170                IRTemp u0, IRTemp u1, IRTemp u2, Int laneszB
8171             )
8172 {
8173 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8174 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8175 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8176    /* The following assumes that the guest is little endian, and hence
8177       that the memory-side (interleaved) data is stored
8178       little-endianly. */
8179    vassert(i0 && i1 && i2);
8180    if (laneszB == 4) {
8181       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8182       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8183       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8184       assign(*i0, IHI32x2(SHL64(u1, 32), SHL64(u0, 32)));
8185       assign(*i1, IHI32x2(SHL64(u0,  0), SHL64(u2, 32)));
8186       assign(*i2, IHI32x2(SHL64(u2,  0), SHL64(u1,  0)));
8187    } else if (laneszB == 2) {
8188       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8189       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8190       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8191 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8192                 IHI32x2(                                      \
8193                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8194                            SHL64((_tmp2),48-16*(_la2))),      \
8195                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8196                            SHL64((_tmp0),48-16*(_la0))))
8197       assign(*i0, XXX(u0,1, u2,0, u1,0, u0,0));
8198       assign(*i1, XXX(u1,2, u0,2, u2,1, u1,1));
8199       assign(*i2, XXX(u2,3, u1,3, u0,3, u2,2));
8200 #     undef XXX
8201    } else if (laneszB == 1) {
8202       // These describe how the result vectors [7..0] are
8203       // assembled from the source vectors.  Each pair is
8204       // (source vector number, lane number).
8205       static const UChar in0[16] = {1,2, 0,2, 2,1, 1,1, 0,1, 2,0, 1,0, 0,0};
8206       static const UChar in1[16] = {0,5, 2,4, 1,4, 0,4, 2,3, 1,3, 0,3, 2,2};
8207       static const UChar in2[16] = {2,7, 1,7, 0,7, 2,6, 1,6, 0,6, 2,5, 1,5};
8208       assign(*i0, math_PERM_8x8x3(in0, u0, u1, u2));
8209       assign(*i1, math_PERM_8x8x3(in1, u0, u1, u2));
8210       assign(*i2, math_PERM_8x8x3(in2, u0, u1, u2));
8211    } else {
8212       // Can never happen, since VST3 only has valid lane widths of 32,
8213       // 16 or 8 bits.
8214       vpanic("math_INTERLEAVE_3");
8215    }
8216 #  undef SHL64
8217 #  undef IHI16x4
8218 #  undef IHI32x2
8219 }
8220
8221 /* Generate 4x64 -> 4x64 deinterleave code, for VLD4.  Caller must
8222    make *u0, *u1, *u2 and *u3 be valid IRTemps before the call. */
8223 static void math_DEINTERLEAVE_4 (
8224                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
8225                /*OUT*/IRTemp* u2, /*OUT*/IRTemp* u3,
8226                IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3, Int laneszB
8227             )
8228 {
8229 #  define IHI32x2(_t1, _t2) \
8230              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8231 #  define ILO32x2(_t1, _t2) \
8232              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8233 #  define IHI16x4(_t1, _t2) \
8234              binop(Iop_InterleaveHI16x4, mkexpr(_t1), mkexpr(_t2))
8235 #  define ILO16x4(_t1, _t2) \
8236              binop(Iop_InterleaveLO16x4, mkexpr(_t1), mkexpr(_t2))
8237 #  define IHI8x8(_t1, _e2) \
8238              binop(Iop_InterleaveHI8x8, mkexpr(_t1), _e2)
8239 #  define SHL64(_tmp, _amt) \
8240              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8241    /* The following assumes that the guest is little endian, and hence
8242       that the memory-side (interleaved) data is stored
8243       little-endianly. */
8244    vassert(u0 && u1 && u2 && u3);
8245    if (laneszB == 4) {
8246       assign(*u0, ILO32x2(i2, i0));
8247       assign(*u1, IHI32x2(i2, i0));
8248       assign(*u2, ILO32x2(i3, i1));
8249       assign(*u3, IHI32x2(i3, i1));
8250    } else if (laneszB == 2) {
8251       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8252       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8253       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8254       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8255       assign(b1b0a1a0, ILO16x4(i1, i0));
8256       assign(b3b2a3a2, ILO16x4(i3, i2));
8257       assign(d1d0c1c0, IHI16x4(i1, i0));
8258       assign(d3d2c3c2, IHI16x4(i3, i2));
8259       // And now do what we did for the 32-bit case.
8260       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8261       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8262       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8263       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8264    } else if (laneszB == 1) {
8265       // Deinterleave into 16-bit chunks, then do as the 16-bit case.
8266       IRTemp i0x = newTemp(Ity_I64);
8267       IRTemp i1x = newTemp(Ity_I64);
8268       IRTemp i2x = newTemp(Ity_I64);
8269       IRTemp i3x = newTemp(Ity_I64);
8270       assign(i0x, IHI8x8(i0, SHL64(i0, 32)));
8271       assign(i1x, IHI8x8(i1, SHL64(i1, 32)));
8272       assign(i2x, IHI8x8(i2, SHL64(i2, 32)));
8273       assign(i3x, IHI8x8(i3, SHL64(i3, 32)));
8274       // From here on is like the 16 bit case.
8275       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8276       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8277       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8278       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8279       assign(b1b0a1a0, ILO16x4(i1x, i0x));
8280       assign(b3b2a3a2, ILO16x4(i3x, i2x));
8281       assign(d1d0c1c0, IHI16x4(i1x, i0x));
8282       assign(d3d2c3c2, IHI16x4(i3x, i2x));
8283       // And now do what we did for the 32-bit case.
8284       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8285       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8286       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8287       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8288    } else {
8289       // Can never happen, since VLD4 only has valid lane widths of 32,
8290       // 16 or 8 bits.
8291       vpanic("math_DEINTERLEAVE_4");
8292    }
8293 #  undef SHL64
8294 #  undef IHI8x8
8295 #  undef ILO16x4
8296 #  undef IHI16x4
8297 #  undef ILO32x2
8298 #  undef IHI32x2
8299 }
8300
8301 /* Generate 4x64 -> 4x64 interleave code, for VST4.  Caller must
8302    make *i0, *i1, *i2 and *i3 be valid IRTemps before the call. */
8303 static void math_INTERLEAVE_4 (
8304                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
8305                /*OUT*/IRTemp* i2, /*OUT*/IRTemp* i3,
8306                IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3, Int laneszB
8307             )
8308 {
8309 #  define IHI32x2(_t1, _t2) \
8310              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8311 #  define ILO32x2(_t1, _t2) \
8312              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8313 #  define CEV16x4(_t1, _t2) \
8314              binop(Iop_CatEvenLanes16x4, mkexpr(_t1), mkexpr(_t2))
8315 #  define COD16x4(_t1, _t2) \
8316              binop(Iop_CatOddLanes16x4, mkexpr(_t1), mkexpr(_t2))
8317 #  define COD8x8(_t1, _e2) \
8318              binop(Iop_CatOddLanes8x8, mkexpr(_t1), _e2)
8319 #  define SHL64(_tmp, _amt) \
8320              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8321    /* The following assumes that the guest is little endian, and hence
8322       that the memory-side (interleaved) data is stored
8323       little-endianly. */
8324    vassert(u0 && u1 && u2 && u3);
8325    if (laneszB == 4) {
8326       assign(*i0, ILO32x2(u1, u0));
8327       assign(*i1, ILO32x2(u3, u2));
8328       assign(*i2, IHI32x2(u1, u0));
8329       assign(*i3, IHI32x2(u3, u2));
8330    } else if (laneszB == 2) {
8331       // First, interleave at the 32-bit lane size.
8332       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8333       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8334       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8335       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8336       assign(b1b0a1a0, ILO32x2(u1, u0));
8337       assign(b3b2a3a2, IHI32x2(u1, u0));
8338       assign(d1d0c1c0, ILO32x2(u3, u2));
8339       assign(d3d2c3c2, IHI32x2(u3, u2));
8340       // And interleave (cat) at the 16 bit size.
8341       assign(*i0, CEV16x4(d1d0c1c0, b1b0a1a0));
8342       assign(*i1, COD16x4(d1d0c1c0, b1b0a1a0));
8343       assign(*i2, CEV16x4(d3d2c3c2, b3b2a3a2));
8344       assign(*i3, COD16x4(d3d2c3c2, b3b2a3a2));
8345    } else if (laneszB == 1) {
8346       // First, interleave at the 32-bit lane size.
8347       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8348       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8349       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8350       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8351       assign(b1b0a1a0, ILO32x2(u1, u0));
8352       assign(b3b2a3a2, IHI32x2(u1, u0));
8353       assign(d1d0c1c0, ILO32x2(u3, u2));
8354       assign(d3d2c3c2, IHI32x2(u3, u2));
8355       // And interleave (cat) at the 16 bit size.
8356       IRTemp i0x = newTemp(Ity_I64);
8357       IRTemp i1x = newTemp(Ity_I64);
8358       IRTemp i2x = newTemp(Ity_I64);
8359       IRTemp i3x = newTemp(Ity_I64);
8360       assign(i0x, CEV16x4(d1d0c1c0, b1b0a1a0));
8361       assign(i1x, COD16x4(d1d0c1c0, b1b0a1a0));
8362       assign(i2x, CEV16x4(d3d2c3c2, b3b2a3a2));
8363       assign(i3x, COD16x4(d3d2c3c2, b3b2a3a2));
8364       // And rearrange within each word, to get the right 8 bit lanes.
8365       assign(*i0, COD8x8(i0x, SHL64(i0x, 8)));
8366       assign(*i1, COD8x8(i1x, SHL64(i1x, 8)));
8367       assign(*i2, COD8x8(i2x, SHL64(i2x, 8)));
8368       assign(*i3, COD8x8(i3x, SHL64(i3x, 8)));
8369    } else {
8370       // Can never happen, since VLD4 only has valid lane widths of 32,
8371       // 16 or 8 bits.
8372       vpanic("math_DEINTERLEAVE_4");
8373    }
8374 #  undef SHL64
8375 #  undef COD8x8
8376 #  undef COD16x4
8377 #  undef CEV16x4
8378 #  undef ILO32x2
8379 #  undef IHI32x2
8380 }
8381
8382 /* A7.7 Advanced SIMD element or structure load/store instructions */
8383 static
8384 Bool dis_neon_load_or_store ( UInt theInstr,
8385                               Bool isT, IRTemp condT )
8386 {
8387 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
8388    UInt bA = INSN(23,23);
8389    UInt fB = INSN(11,8);
8390    UInt bL = INSN(21,21);
8391    UInt rD = (INSN(22,22) << 4) | INSN(15,12);
8392    UInt rN = INSN(19,16);
8393    UInt rM = INSN(3,0);
8394    UInt N, size, i, j;
8395    UInt inc;
8396    UInt regs = 1;
8397
8398    if (isT) {
8399       vassert(condT != IRTemp_INVALID);
8400    } else {
8401       vassert(condT == IRTemp_INVALID);
8402    }
8403    /* So now, if condT is not IRTemp_INVALID, we know we're
8404       dealing with Thumb code. */
8405
8406    if (INSN(20,20) != 0)
8407       return False;
8408
8409    IRTemp initialRn = newTemp(Ity_I32);
8410    assign(initialRn, isT ? getIRegT(rN) : getIRegA(rN));
8411
8412    IRTemp initialRm = newTemp(Ity_I32);
8413    assign(initialRm, isT ? getIRegT(rM) : getIRegA(rM));
8414
8415    /* There are 3 cases:
8416       (1) VSTn / VLDn (n-element structure from/to one lane)
8417       (2) VLDn (single element to all lanes)
8418       (3) VSTn / VLDn (multiple n-element structures)
8419    */
8420    if (bA) {
8421       N = fB & 3;
8422       if ((fB >> 2) < 3) {
8423          /* ------------ Case (1) ------------
8424             VSTn / VLDn (n-element structure from/to one lane) */
8425
8426          size = fB >> 2;
8427
8428          switch (size) {
8429             case 0: i = INSN(7,5); inc = 1; break;
8430             case 1: i = INSN(7,6); inc = INSN(5,5) ? 2 : 1; break;
8431             case 2: i = INSN(7,7); inc = INSN(6,6) ? 2 : 1; break;
8432             case 3: return False;
8433             default: vassert(0);
8434          }
8435
8436          IRTemp addr = newTemp(Ity_I32);
8437          assign(addr, mkexpr(initialRn));
8438
8439          // go uncond
8440          if (condT != IRTemp_INVALID)
8441             mk_skip_over_T32_if_cond_is_false(condT);
8442          // now uncond
8443
8444          if (bL)
8445             mk_neon_elem_load_to_one_lane(rD, inc, i, N, size, addr);
8446          else
8447             mk_neon_elem_store_from_one_lane(rD, inc, i, N, size, addr);
8448          DIP("v%s%u.%d {", bL ? "ld" : "st", N + 1, 8 << size);
8449          for (j = 0; j <= N; j++) {
8450             if (j)
8451                DIP(", ");
8452             DIP("d%u[%u]", rD + j * inc, i);
8453          }
8454          DIP("}, [r%u]", rN);
8455          if (rM != 13 && rM != 15) {
8456             DIP(", r%u\n", rM);
8457          } else {
8458             DIP("%s\n", (rM != 15) ? "!" : "");
8459          }
8460       } else {
8461          /* ------------ Case (2) ------------
8462             VLDn (single element to all lanes) */
8463          UInt r;
8464          if (bL == 0)
8465             return False;
8466
8467          inc = INSN(5,5) + 1;
8468          size = INSN(7,6);
8469
8470          /* size == 3 and size == 2 cases differ in alignment constraints */
8471          if (size == 3 && N == 3 && INSN(4,4) == 1)
8472             size = 2;
8473
8474          if (size == 0 && N == 0 && INSN(4,4) == 1)
8475             return False;
8476          if (N == 2 && INSN(4,4) == 1)
8477             return False;
8478          if (size == 3)
8479             return False;
8480
8481          // go uncond
8482          if (condT != IRTemp_INVALID)
8483             mk_skip_over_T32_if_cond_is_false(condT);
8484          // now uncond
8485
8486          IRTemp addr = newTemp(Ity_I32);
8487          assign(addr, mkexpr(initialRn));
8488
8489          if (N == 0 && INSN(5,5))
8490             regs = 2;
8491
8492          for (r = 0; r < regs; r++) {
8493             switch (size) {
8494                case 0:
8495                   putDRegI64(rD + r, unop(Iop_Dup8x8,
8496                                           loadLE(Ity_I8, mkexpr(addr))),
8497                              IRTemp_INVALID);
8498                   break;
8499                case 1:
8500                   putDRegI64(rD + r, unop(Iop_Dup16x4,
8501                                           loadLE(Ity_I16, mkexpr(addr))),
8502                              IRTemp_INVALID);
8503                   break;
8504                case 2:
8505                   putDRegI64(rD + r, unop(Iop_Dup32x2,
8506                                           loadLE(Ity_I32, mkexpr(addr))),
8507                              IRTemp_INVALID);
8508                   break;
8509                default:
8510                   vassert(0);
8511             }
8512             for (i = 1; i <= N; i++) {
8513                switch (size) {
8514                   case 0:
8515                      putDRegI64(rD + r + i * inc,
8516                                 unop(Iop_Dup8x8,
8517                                      loadLE(Ity_I8, binop(Iop_Add32,
8518                                                           mkexpr(addr),
8519                                                           mkU32(i * 1)))),
8520                                 IRTemp_INVALID);
8521                      break;
8522                   case 1:
8523                      putDRegI64(rD + r + i * inc,
8524                                 unop(Iop_Dup16x4,
8525                                      loadLE(Ity_I16, binop(Iop_Add32,
8526                                                            mkexpr(addr),
8527                                                            mkU32(i * 2)))),
8528                                 IRTemp_INVALID);
8529                      break;
8530                   case 2:
8531                      putDRegI64(rD + r + i * inc,
8532                                 unop(Iop_Dup32x2,
8533                                      loadLE(Ity_I32, binop(Iop_Add32,
8534                                                            mkexpr(addr),
8535                                                            mkU32(i * 4)))),
8536                                 IRTemp_INVALID);
8537                      break;
8538                   default:
8539                      vassert(0);
8540                }
8541             }
8542          }
8543          DIP("vld%u.%d {", N + 1, 8 << size);
8544          for (r = 0; r < regs; r++) {
8545             for (i = 0; i <= N; i++) {
8546                if (i || r)
8547                   DIP(", ");
8548                DIP("d%u[]", rD + r + i * inc);
8549             }
8550          }
8551          DIP("}, [r%u]", rN);
8552          if (rM != 13 && rM != 15) {
8553             DIP(", r%u\n", rM);
8554          } else {
8555             DIP("%s\n", (rM != 15) ? "!" : "");
8556          }
8557       }
8558       /* Writeback.  We're uncond here, so no condT-ing. */
8559       if (rM != 15) {
8560          if (rM == 13) {
8561             IRExpr* e = binop(Iop_Add32,
8562                               mkexpr(initialRn),
8563                               mkU32((1 << size) * (N + 1)));
8564             if (isT)
8565                putIRegT(rN, e, IRTemp_INVALID);
8566             else
8567                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8568          } else {
8569             IRExpr* e = binop(Iop_Add32,
8570                               mkexpr(initialRn),
8571                               mkexpr(initialRm));
8572             if (isT)
8573                putIRegT(rN, e, IRTemp_INVALID);
8574             else
8575                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8576          }
8577       }
8578       return True;
8579    } else {
8580       /* ------------ Case (3) ------------
8581          VSTn / VLDn (multiple n-element structures) */
8582       inc = (fB & 1) + 1;
8583
8584       if (fB == BITS4(0,0,1,0)       // Dd, Dd+1, Dd+2, Dd+3  inc = 1  regs = 4
8585           || fB == BITS4(0,1,1,0)    // Dd, Dd+1, Dd+2        inc = 1  regs = 3
8586           || fB == BITS4(0,1,1,1)    // Dd                    inc = 2  regs = 1
8587           || fB == BITS4(1,0,1,0)) { // Dd, Dd+1              inc = 1  regs = 2
8588          N = 0; // VLD1/VST1.  'inc' does not appear to have any
8589                 // meaning for the VLD1/VST1 cases.  'regs' is the number of
8590                 // registers involved.
8591          if (rD + regs > 32) return False;
8592       }
8593       else
8594       if (fB == BITS4(0,0,1,1)       // Dd, Dd+1, Dd+2, Dd+3  inc=2  regs = 2
8595           || fB == BITS4(1,0,0,0)    // Dd, Dd+1              inc=1  regs = 1
8596           || fB == BITS4(1,0,0,1)) { // Dd, Dd+2              inc=2  regs = 1
8597          N = 1; // VLD2/VST2.  'regs' is the number of register-pairs involved
8598          if (regs == 1 && inc == 1 && rD + 1 >= 32) return False;
8599          if (regs == 1 && inc == 2 && rD + 2 >= 32) return False;
8600          if (regs == 2 && inc == 2 && rD + 3 >= 32) return False;
8601       } else if (fB == BITS4(0,1,0,0) || fB == BITS4(0,1,0,1)) {
8602          N = 2; // VLD3/VST3
8603          if (inc == 1 && rD + 2 >= 32) return False;
8604          if (inc == 2 && rD + 4 >= 32) return False;
8605       } else if (fB == BITS4(0,0,0,0) || fB == BITS4(0,0,0,1)) {
8606          N = 3; // VLD4/VST4
8607          if (inc == 1 && rD + 3 >= 32) return False;
8608          if (inc == 2 && rD + 6 >= 32) return False;
8609       } else {
8610          return False;
8611       }
8612
8613       if (N == 1 && fB == BITS4(0,0,1,1)) {
8614          regs = 2;
8615       } else if (N == 0) {
8616          if (fB == BITS4(1,0,1,0)) {
8617             regs = 2;
8618          } else if (fB == BITS4(0,1,1,0)) {
8619             regs = 3;
8620          } else if (fB == BITS4(0,0,1,0)) {
8621             regs = 4;
8622          }
8623       }
8624
8625       size = INSN(7,6);
8626       if (N == 0 && size == 3)
8627          size = 2;
8628       if (size == 3)
8629          return False;
8630
8631       // go uncond
8632       if (condT != IRTemp_INVALID)
8633          mk_skip_over_T32_if_cond_is_false(condT);
8634       // now uncond
8635
8636       IRTemp addr = newTemp(Ity_I32);
8637       assign(addr, mkexpr(initialRn));
8638
8639       if (N == 0 /* No interleaving -- VLD1/VST1 */) {
8640          UInt r;
8641          vassert(regs == 1 || regs == 2 || regs == 3 || regs == 4);
8642          /* inc has no relevance here */
8643          for (r = 0; r < regs; r++) {
8644             if (bL)
8645                putDRegI64(rD+r, loadLE(Ity_I64, mkexpr(addr)), IRTemp_INVALID);
8646             else
8647                storeLE(mkexpr(addr), getDRegI64(rD+r));
8648             IRTemp tmp = newTemp(Ity_I32);
8649             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(8)));
8650             addr = tmp;
8651          }
8652       }
8653       else
8654       if (N == 1 /* 2-interleaving -- VLD2/VST2 */) {
8655          vassert( (regs == 1 && (inc == 1 || inc == 2))
8656                    || (regs == 2 && inc == 2) );
8657          // Make 'nregs' be the number of registers and 'regstep'
8658          // equal the actual register-step.  The ARM encoding, using 'regs'
8659          // and 'inc', is bizarre.  After this, we have:
8660          // Dd, Dd+1              regs = 1, inc = 1,   nregs = 2, regstep = 1
8661          // Dd, Dd+2              regs = 1, inc = 2,   nregs = 2, regstep = 2
8662          // Dd, Dd+1, Dd+2, Dd+3  regs = 2, inc = 2,   nregs = 4, regstep = 1
8663          UInt nregs   = 2;
8664          UInt regstep = 1;
8665          if (regs == 1 && inc == 1) {
8666             /* nothing */
8667          } else if (regs == 1 && inc == 2) {
8668             regstep = 2;
8669          } else if (regs == 2 && inc == 2) {
8670             nregs = 4;
8671          } else {
8672             vassert(0);
8673          }
8674          // 'a' is address,
8675          // 'di' is interleaved data, 'du' is uninterleaved data
8676          if (nregs == 2) {
8677             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8678             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8679             IRTemp  di0 = newTemp(Ity_I64);
8680             IRTemp  di1 = newTemp(Ity_I64);
8681             IRTemp  du0 = newTemp(Ity_I64);
8682             IRTemp  du1 = newTemp(Ity_I64);
8683             if (bL) {
8684                assign(di0, loadLE(Ity_I64, a0));
8685                assign(di1, loadLE(Ity_I64, a1));
8686                math_DEINTERLEAVE_2(&du0, &du1, di0, di1, 1 << size);
8687                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8688                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8689             } else {
8690                assign(du0, getDRegI64(rD + 0 * regstep));
8691                assign(du1, getDRegI64(rD + 1 * regstep));
8692                math_INTERLEAVE_2(&di0, &di1, du0, du1, 1 << size);
8693                storeLE(a0, mkexpr(di0));
8694                storeLE(a1, mkexpr(di1));
8695             }
8696             IRTemp tmp = newTemp(Ity_I32);
8697             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(16)));
8698             addr = tmp;
8699          } else {
8700             vassert(nregs == 4);
8701             vassert(regstep == 1);
8702             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8703             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8704             IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8705             IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8706             IRTemp  di0 = newTemp(Ity_I64);
8707             IRTemp  di1 = newTemp(Ity_I64);
8708             IRTemp  di2 = newTemp(Ity_I64);
8709             IRTemp  di3 = newTemp(Ity_I64);
8710             IRTemp  du0 = newTemp(Ity_I64);
8711             IRTemp  du1 = newTemp(Ity_I64);
8712             IRTemp  du2 = newTemp(Ity_I64);
8713             IRTemp  du3 = newTemp(Ity_I64);
8714             if (bL) {
8715                assign(di0, loadLE(Ity_I64, a0));
8716                assign(di1, loadLE(Ity_I64, a1));
8717                assign(di2, loadLE(Ity_I64, a2));
8718                assign(di3, loadLE(Ity_I64, a3));
8719                // Note spooky interleaving: du0, du2, di0, di1 etc
8720                math_DEINTERLEAVE_2(&du0, &du2, di0, di1, 1 << size);
8721                math_DEINTERLEAVE_2(&du1, &du3, di2, di3, 1 << size);
8722                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8723                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8724                putDRegI64(rD + 2 * regstep, mkexpr(du2), IRTemp_INVALID);
8725                putDRegI64(rD + 3 * regstep, mkexpr(du3), IRTemp_INVALID);
8726             } else {
8727                assign(du0, getDRegI64(rD + 0 * regstep));
8728                assign(du1, getDRegI64(rD + 1 * regstep));
8729                assign(du2, getDRegI64(rD + 2 * regstep));
8730                assign(du3, getDRegI64(rD + 3 * regstep));
8731                // Note spooky interleaving: du0, du2, di0, di1 etc
8732                math_INTERLEAVE_2(&di0, &di1, du0, du2, 1 << size);
8733                math_INTERLEAVE_2(&di2, &di3, du1, du3, 1 << size);
8734                storeLE(a0, mkexpr(di0));
8735                storeLE(a1, mkexpr(di1));
8736                storeLE(a2, mkexpr(di2));
8737                storeLE(a3, mkexpr(di3));
8738             }
8739
8740             IRTemp tmp = newTemp(Ity_I32);
8741             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8742             addr = tmp;
8743          }
8744       }
8745       else
8746       if (N == 2 /* 3-interleaving -- VLD3/VST3 */) {
8747          // Dd, Dd+1, Dd+2   regs = 1, inc = 1
8748          // Dd, Dd+2, Dd+4   regs = 1, inc = 2
8749          vassert(regs == 1 && (inc == 1 || inc == 2));
8750          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8751          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8752          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8753          IRTemp  di0 = newTemp(Ity_I64);
8754          IRTemp  di1 = newTemp(Ity_I64);
8755          IRTemp  di2 = newTemp(Ity_I64);
8756          IRTemp  du0 = newTemp(Ity_I64);
8757          IRTemp  du1 = newTemp(Ity_I64);
8758          IRTemp  du2 = newTemp(Ity_I64);
8759          if (bL) {
8760             assign(di0, loadLE(Ity_I64, a0));
8761             assign(di1, loadLE(Ity_I64, a1));
8762             assign(di2, loadLE(Ity_I64, a2));
8763             math_DEINTERLEAVE_3(&du0, &du1, &du2, di0, di1, di2, 1 << size);
8764             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8765             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8766             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8767          } else {
8768             assign(du0, getDRegI64(rD + 0 * inc));
8769             assign(du1, getDRegI64(rD + 1 * inc));
8770             assign(du2, getDRegI64(rD + 2 * inc));
8771             math_INTERLEAVE_3(&di0, &di1, &di2, du0, du1, du2, 1 << size);
8772             storeLE(a0, mkexpr(di0));
8773             storeLE(a1, mkexpr(di1));
8774             storeLE(a2, mkexpr(di2));
8775          }
8776          IRTemp tmp = newTemp(Ity_I32);
8777          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(24)));
8778          addr = tmp;
8779       }
8780       else
8781       if (N == 3 /* 4-interleaving -- VLD4/VST4 */) {
8782          // Dd, Dd+1, Dd+2, Dd+3   regs = 1, inc = 1
8783          // Dd, Dd+2, Dd+4, Dd+6   regs = 1, inc = 2
8784          vassert(regs == 1 && (inc == 1 || inc == 2));
8785          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8786          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8787          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8788          IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8789          IRTemp  di0 = newTemp(Ity_I64);
8790          IRTemp  di1 = newTemp(Ity_I64);
8791          IRTemp  di2 = newTemp(Ity_I64);
8792          IRTemp  di3 = newTemp(Ity_I64);
8793          IRTemp  du0 = newTemp(Ity_I64);
8794          IRTemp  du1 = newTemp(Ity_I64);
8795          IRTemp  du2 = newTemp(Ity_I64);
8796          IRTemp  du3 = newTemp(Ity_I64);
8797          if (bL) {
8798             assign(di0, loadLE(Ity_I64, a0));
8799             assign(di1, loadLE(Ity_I64, a1));
8800             assign(di2, loadLE(Ity_I64, a2));
8801             assign(di3, loadLE(Ity_I64, a3));
8802             math_DEINTERLEAVE_4(&du0, &du1, &du2, &du3,
8803                                 di0, di1, di2, di3, 1 << size);
8804             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8805             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8806             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8807             putDRegI64(rD + 3 * inc, mkexpr(du3), IRTemp_INVALID);
8808          } else {
8809             assign(du0, getDRegI64(rD + 0 * inc));
8810             assign(du1, getDRegI64(rD + 1 * inc));
8811             assign(du2, getDRegI64(rD + 2 * inc));
8812             assign(du3, getDRegI64(rD + 3 * inc));
8813             math_INTERLEAVE_4(&di0, &di1, &di2, &di3,
8814                               du0, du1, du2, du3, 1 << size);
8815             storeLE(a0, mkexpr(di0));
8816             storeLE(a1, mkexpr(di1));
8817             storeLE(a2, mkexpr(di2));
8818             storeLE(a3, mkexpr(di3));
8819          }
8820          IRTemp tmp = newTemp(Ity_I32);
8821          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8822          addr = tmp;
8823       }
8824       else {
8825          vassert(0);
8826       }
8827
8828       /* Writeback */
8829       if (rM != 15) {
8830          IRExpr* e;
8831          if (rM == 13) {
8832             e = binop(Iop_Add32, mkexpr(initialRn),
8833                                  mkU32(8 * (N + 1) * regs));
8834          } else {
8835             e = binop(Iop_Add32, mkexpr(initialRn),
8836                                  mkexpr(initialRm));
8837          }
8838          if (isT)
8839             putIRegT(rN, e, IRTemp_INVALID);
8840          else
8841             putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8842       }
8843
8844       DIP("v%s%u.%d {", bL ? "ld" : "st", N + 1, 8 << INSN(7,6));
8845       if ((inc == 1 && regs * (N + 1) > 1)
8846           || (inc == 2 && regs > 1 && N > 0)) {
8847          DIP("d%u-d%u", rD, rD + regs * (N + 1) - 1);
8848       } else {
8849          UInt r;
8850          for (r = 0; r < regs; r++) {
8851             for (i = 0; i <= N; i++) {
8852                if (i || r)
8853                   DIP(", ");
8854                DIP("d%u", rD + r + i * inc);
8855             }
8856          }
8857       }
8858       DIP("}, [r%u]", rN);
8859       if (rM != 13 && rM != 15) {
8860          DIP(", r%u\n", rM);
8861       } else {
8862          DIP("%s\n", (rM != 15) ? "!" : "");
8863       }
8864       return True;
8865    }
8866 #  undef INSN
8867 }
8868
8869
8870 /*------------------------------------------------------------*/
8871 /*--- NEON, top level control                              ---*/
8872 /*------------------------------------------------------------*/
8873
8874 /* Both ARM and Thumb */
8875
8876 /* Translate a NEON instruction.    If successful, returns
8877    True and *dres may or may not be updated.  If failure, returns
8878    False and doesn't change *dres nor create any IR.
8879
8880    The Thumb and ARM encodings are similar for the 24 bottom bits, but
8881    the top 8 bits are slightly different.  In both cases, the caller
8882    must pass the entire 32 bits.  Callers may pass any instruction;
8883    this ignores non-NEON ones.
8884
8885    Caller must supply an IRTemp 'condT' holding the gating condition,
8886    or IRTemp_INVALID indicating the insn is always executed.  In ARM
8887    code, this must always be IRTemp_INVALID because NEON insns are
8888    unconditional for ARM.
8889
8890    Finally, the caller must indicate whether this occurs in ARM or in
8891    Thumb code.
8892
8893    This only handles NEON for ARMv7 and below.  The NEON extensions
8894    for v8 are handled by decode_V8_instruction.
8895 */
8896 static Bool decode_NEON_instruction_ARMv7_and_below (
8897                /*MOD*/DisResult* dres,
8898                UInt              insn32,
8899                IRTemp            condT,
8900                Bool              isT
8901             )
8902 {
8903 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn32, (_bMax), (_bMin))
8904
8905    /* There are two kinds of instruction to deal with: load/store and
8906       data processing.  In each case, in ARM mode we merely identify
8907       the kind, and pass it on to the relevant sub-handler.  In Thumb
8908       mode we identify the kind, swizzle the bits around to make it
8909       have the same encoding as in ARM, and hand it on to the
8910       sub-handler.
8911    */
8912
8913    /* In ARM mode, NEON instructions can't be conditional. */
8914    if (!isT)
8915       vassert(condT == IRTemp_INVALID);
8916
8917    /* Data processing:
8918       Thumb: 111U 1111 AAAA Axxx xxxx BBBB CCCC xxxx
8919       ARM:   1111 001U AAAA Axxx xxxx BBBB CCCC xxxx
8920    */
8921    if (!isT && INSN(31,25) == BITS7(1,1,1,1,0,0,1)) {
8922       // ARM, DP
8923       return dis_neon_data_processing(INSN(31,0), condT);
8924    }
8925    if (isT && INSN(31,29) == BITS3(1,1,1)
8926        && INSN(27,24) == BITS4(1,1,1,1)) {
8927       // Thumb, DP
8928       UInt reformatted = INSN(23,0);
8929       reformatted |= (((UInt)INSN(28,28)) << 24); // U bit
8930       reformatted |= (((UInt)BITS7(1,1,1,1,0,0,1)) << 25);
8931       return dis_neon_data_processing(reformatted, condT);
8932    }
8933
8934    /* Load/store:
8935       Thumb: 1111 1001 AxL0 xxxx xxxx BBBB xxxx xxxx
8936       ARM:   1111 0100 AxL0 xxxx xxxx BBBB xxxx xxxx
8937    */
8938    if (!isT && INSN(31,24) == BITS8(1,1,1,1,0,1,0,0)) {
8939       // ARM, memory
8940       return dis_neon_load_or_store(INSN(31,0), isT, condT);
8941    }
8942    if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
8943       UInt reformatted = INSN(23,0);
8944       reformatted |= (((UInt)BITS8(1,1,1,1,0,1,0,0)) << 24);
8945       return dis_neon_load_or_store(reformatted, isT, condT);
8946    }
8947
8948    /* Doesn't match. */
8949    return False;
8950
8951 #  undef INSN
8952 }
8953
8954
8955 /*------------------------------------------------------------*/
8956 /*--- V6 MEDIA instructions                                ---*/
8957 /*------------------------------------------------------------*/
8958
8959 /* Both ARM and Thumb */
8960
8961 /* Translate a V6 media instruction.    If successful, returns
8962    True and *dres may or may not be updated.  If failure, returns
8963    False and doesn't change *dres nor create any IR.
8964
8965    The Thumb and ARM encodings are completely different.  In Thumb
8966    mode, the caller must pass the entire 32 bits.  In ARM mode it must
8967    pass the lower 28 bits.  Apart from that, callers may pass any
8968    instruction; this function ignores anything it doesn't recognise.
8969
8970    Caller must supply an IRTemp 'condT' holding the gating condition,
8971    or IRTemp_INVALID indicating the insn is always executed.
8972
8973    Caller must also supply an ARMCondcode 'conq'.  This is only used
8974    for debug printing, no other purpose.  For ARM, this is simply the
8975    top 4 bits of the original instruction.  For Thumb, the condition
8976    is not (really) known until run time, and so ARMCondAL should be
8977    passed, only so that printing of these instructions does not show
8978    any condition.
8979
8980    Finally, the caller must indicate whether this occurs in ARM or in
8981    Thumb code.
8982 */
8983 static Bool decode_V6MEDIA_instruction (
8984                /*MOD*/DisResult* dres,
8985                UInt              insnv6m,
8986                IRTemp            condT,
8987                ARMCondcode       conq,
8988                Bool              isT
8989             )
8990 {
8991 #  define INSNA(_bMax,_bMin)   SLICE_UInt(insnv6m, (_bMax), (_bMin))
8992 #  define INSNT0(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 16) & 0xFFFF), \
8993                                            (_bMax), (_bMin) )
8994 #  define INSNT1(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 0)  & 0xFFFF), \
8995                                            (_bMax), (_bMin) )
8996    HChar dis_buf[128];
8997    dis_buf[0] = 0;
8998
8999    if (isT) {
9000       vassert(conq == ARMCondAL);
9001    } else {
9002       vassert(INSNA(31,28) == BITS4(0,0,0,0)); // caller's obligation
9003       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
9004    }
9005
9006    /* ----------- smulbb, smulbt, smultb, smultt ----------- */
9007    {
9008      UInt regD = 99, regM = 99, regN = 99, bitM = 0, bitN = 0;
9009      Bool gate = False;
9010
9011      if (isT) {
9012         if (INSNT0(15,4) == 0xFB1 && INSNT1(15,12) == BITS4(1,1,1,1)
9013             && INSNT1(7,6) == BITS2(0,0)) {
9014            regD = INSNT1(11,8);
9015            regM = INSNT1(3,0);
9016            regN = INSNT0(3,0);
9017            bitM = INSNT1(4,4);
9018            bitN = INSNT1(5,5);
9019            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9020               gate = True;
9021         }
9022      } else {
9023         if (BITS8(0,0,0,1,0,1,1,0) == INSNA(27,20) &&
9024             BITS4(0,0,0,0)         == INSNA(15,12) &&
9025             BITS4(1,0,0,0)         == (INSNA(7,4) & BITS4(1,0,0,1)) ) {
9026            regD = INSNA(19,16);
9027            regM = INSNA(11,8);
9028            regN = INSNA(3,0);
9029            bitM = INSNA(6,6);
9030            bitN = INSNA(5,5);
9031            if (regD != 15 && regN != 15 && regM != 15)
9032               gate = True;
9033         }
9034      }
9035
9036      if (gate) {
9037         IRTemp srcN = newTemp(Ity_I32);
9038         IRTemp srcM = newTemp(Ity_I32);
9039         IRTemp res  = newTemp(Ity_I32);
9040
9041         assign( srcN, binop(Iop_Sar32,
9042                             binop(Iop_Shl32,
9043                                   isT ? getIRegT(regN) : getIRegA(regN),
9044                                   mkU8(bitN ? 0 : 16)), mkU8(16)) );
9045         assign( srcM, binop(Iop_Sar32,
9046                             binop(Iop_Shl32,
9047                                   isT ? getIRegT(regM) : getIRegA(regM),
9048                                   mkU8(bitM ? 0 : 16)), mkU8(16)) );
9049         assign( res, binop(Iop_Mul32, mkexpr(srcN), mkexpr(srcM)) );
9050
9051         if (isT)
9052            putIRegT( regD, mkexpr(res), condT );
9053         else
9054            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9055
9056         DIP( "smul%c%c%s r%u, r%u, r%u\n", bitN ? 't' : 'b', bitM ? 't' : 'b',
9057              nCC(conq), regD, regN, regM );
9058         return True;
9059      }
9060      /* fall through */
9061    }
9062
9063    /* ------------ smulwb<y><c> <Rd>,<Rn>,<Rm> ------------- */
9064    /* ------------ smulwt<y><c> <Rd>,<Rn>,<Rm> ------------- */
9065    {
9066      UInt regD = 99, regN = 99, regM = 99, bitM = 0;
9067      Bool gate = False;
9068
9069      if (isT) {
9070         if (INSNT0(15,4) == 0xFB3 && INSNT1(15,12) == BITS4(1,1,1,1)
9071             && INSNT1(7,5) == BITS3(0,0,0)) {
9072           regN = INSNT0(3,0);
9073           regD = INSNT1(11,8);
9074           regM = INSNT1(3,0);
9075           bitM = INSNT1(4,4);
9076           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9077              gate = True;
9078         }
9079      } else {
9080         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
9081             INSNA(15,12) == BITS4(0,0,0,0)         &&
9082             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,1,0)) {
9083            regD = INSNA(19,16);
9084            regN = INSNA(3,0);
9085            regM = INSNA(11,8);
9086            bitM = INSNA(6,6);
9087            if (regD != 15 && regN != 15 && regM != 15)
9088               gate = True;
9089         }
9090      }
9091
9092      if (gate) {
9093         IRTemp irt_prod = newTemp(Ity_I64);
9094
9095         assign( irt_prod,
9096                 binop(Iop_MullS32,
9097                       isT ? getIRegT(regN) : getIRegA(regN),
9098                       binop(Iop_Sar32,
9099                             binop(Iop_Shl32,
9100                                   isT ? getIRegT(regM) : getIRegA(regM),
9101                                   mkU8(bitM ? 0 : 16)),
9102                             mkU8(16))) );
9103
9104         IRExpr* ire_result = binop(Iop_Or32,
9105                                    binop( Iop_Shl32,
9106                                           unop(Iop_64HIto32, mkexpr(irt_prod)),
9107                                           mkU8(16) ),
9108                                    binop( Iop_Shr32,
9109                                           unop(Iop_64to32, mkexpr(irt_prod)),
9110                                           mkU8(16) ) );
9111
9112         if (isT)
9113            putIRegT( regD, ire_result, condT );
9114         else
9115            putIRegA( regD, ire_result, condT, Ijk_Boring );
9116
9117         DIP("smulw%c%s r%u, r%u, r%u\n",
9118             bitM ? 't' : 'b', nCC(conq),regD,regN,regM);
9119         return True;
9120      }
9121      /* fall through */
9122    }
9123
9124    /* ------------ pkhbt<c> Rd, Rn, Rm {,LSL #imm} ------------- */
9125    /* ------------ pkhtb<c> Rd, Rn, Rm {,ASR #imm} ------------- */
9126    {
9127      UInt regD = 99, regN = 99, regM = 99, imm5 = 99, shift_type = 99;
9128      Bool tbform = False;
9129      Bool gate = False;
9130
9131      if (isT) {
9132         if (INSNT0(15,4) == 0xEAC
9133             && INSNT1(15,15) == 0 && INSNT1(4,4) == 0) {
9134            regN = INSNT0(3,0);
9135            regD = INSNT1(11,8);
9136            regM = INSNT1(3,0);
9137            imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
9138            shift_type = (INSNT1(5,5) << 1) | 0;
9139            tbform = (INSNT1(5,5) == 0) ? False : True;
9140            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9141               gate = True;
9142         }
9143      } else {
9144         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
9145             INSNA(5,4)   == BITS2(0,1) /*          &&
9146             (INSNA(6,6)  == 0 || INSNA(6,6) == 1)
9147             This last bit with INSNA(6,6) is correct, but gcc 8 complains
9148             (correctly) that it is always true.  So I commented it out
9149             to keep gcc quiet. */ ) {
9150            regD = INSNA(15,12);
9151            regN = INSNA(19,16);
9152            regM = INSNA(3,0);
9153            imm5 = INSNA(11,7);
9154            shift_type = (INSNA(6,6) << 1) | 0;
9155            tbform = (INSNA(6,6) == 0) ? False : True;
9156            if (regD != 15 && regN != 15 && regM != 15)
9157               gate = True;
9158         }
9159      }
9160
9161      if (gate) {
9162         IRTemp irt_regM       = newTemp(Ity_I32);
9163         IRTemp irt_regM_shift = newTemp(Ity_I32);
9164         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
9165         compute_result_and_C_after_shift_by_imm5(
9166            dis_buf, &irt_regM_shift, NULL, irt_regM, shift_type, imm5, regM );
9167
9168         UInt mask = (tbform == True) ? 0x0000FFFF : 0xFFFF0000;
9169         IRExpr* ire_result
9170           = binop( Iop_Or32,
9171                    binop(Iop_And32, mkexpr(irt_regM_shift), mkU32(mask)),
9172                    binop(Iop_And32, isT ? getIRegT(regN) : getIRegA(regN),
9173                                     unop(Iop_Not32, mkU32(mask))) );
9174
9175         if (isT)
9176            putIRegT( regD, ire_result, condT );
9177         else
9178            putIRegA( regD, ire_result, condT, Ijk_Boring );
9179
9180         DIP( "pkh%s%s r%u, r%u, r%u %s\n", tbform ? "tb" : "bt",
9181              nCC(conq), regD, regN, regM, dis_buf );
9182
9183         return True;
9184      }
9185      /* fall through */
9186    }
9187
9188    /* ---------- usat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9189    {
9190      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9191      Bool gate = False;
9192
9193      if (isT) {
9194         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,1,0)
9195             && INSNT0(4,4) == 0
9196             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9197            regD       = INSNT1(11,8);
9198            regN       = INSNT0(3,0);
9199            shift_type = (INSNT0(5,5) << 1) | 0;
9200            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9201            sat_imm    = INSNT1(4,0);
9202            if (!isBadRegT(regD) && !isBadRegT(regN))
9203               gate = True;
9204            if (shift_type == BITS2(1,0) && imm5 == 0)
9205               gate = False;
9206         }
9207      } else {
9208         if (INSNA(27,21) == BITS7(0,1,1,0,1,1,1) &&
9209             INSNA(5,4)   == BITS2(0,1)) {
9210            regD       = INSNA(15,12);
9211            regN       = INSNA(3,0);
9212            shift_type = (INSNA(6,6) << 1) | 0;
9213            imm5       = INSNA(11,7);
9214            sat_imm    = INSNA(20,16);
9215            if (regD != 15 && regN != 15)
9216               gate = True;
9217         }
9218      }
9219
9220      if (gate) {
9221         IRTemp irt_regN       = newTemp(Ity_I32);
9222         IRTemp irt_regN_shift = newTemp(Ity_I32);
9223         IRTemp irt_sat_Q      = newTemp(Ity_I32);
9224         IRTemp irt_result     = newTemp(Ity_I32);
9225
9226         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9227         compute_result_and_C_after_shift_by_imm5(
9228                 dis_buf, &irt_regN_shift, NULL,
9229                 irt_regN, shift_type, imm5, regN );
9230
9231         armUnsignedSatQ( &irt_result, &irt_sat_Q, irt_regN_shift, sat_imm );
9232         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9233
9234         if (isT)
9235            putIRegT( regD, mkexpr(irt_result), condT );
9236         else
9237            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9238
9239         DIP("usat%s r%u, #0x%04x, %s\n",
9240             nCC(conq), regD, imm5, dis_buf);
9241         return True;
9242      }
9243      /* fall through */
9244    }
9245
9246   /* ----------- ssat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9247    {
9248      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9249      Bool gate = False;
9250
9251      if (isT) {
9252         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9253             && INSNT0(4,4) == 0
9254             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9255            regD       = INSNT1(11,8);
9256            regN       = INSNT0(3,0);
9257            shift_type = (INSNT0(5,5) << 1) | 0;
9258            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9259            sat_imm    = INSNT1(4,0) + 1;
9260            if (!isBadRegT(regD) && !isBadRegT(regN))
9261               gate = True;
9262            if (shift_type == BITS2(1,0) && imm5 == 0)
9263               gate = False;
9264         }
9265      } else {
9266         if (INSNA(27,21) == BITS7(0,1,1,0,1,0,1) &&
9267             INSNA(5,4)   == BITS2(0,1)) {
9268            regD       = INSNA(15,12);
9269            regN       = INSNA(3,0);
9270            shift_type = (INSNA(6,6) << 1) | 0;
9271            imm5       = INSNA(11,7);
9272            sat_imm    = INSNA(20,16) + 1;
9273            if (regD != 15 && regN != 15)
9274               gate = True;
9275         }
9276      }
9277
9278      if (gate) {
9279         IRTemp irt_regN       = newTemp(Ity_I32);
9280         IRTemp irt_regN_shift = newTemp(Ity_I32);
9281         IRTemp irt_sat_Q      = newTemp(Ity_I32);
9282         IRTemp irt_result     = newTemp(Ity_I32);
9283
9284         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9285         compute_result_and_C_after_shift_by_imm5(
9286                 dis_buf, &irt_regN_shift, NULL,
9287                 irt_regN, shift_type, imm5, regN );
9288
9289         armSignedSatQ( irt_regN_shift, sat_imm, &irt_result, &irt_sat_Q );
9290         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9291
9292         if (isT)
9293            putIRegT( regD, mkexpr(irt_result), condT );
9294         else
9295            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9296
9297         DIP( "ssat%s r%u, #0x%04x, %s\n",
9298              nCC(conq), regD, imm5, dis_buf);
9299         return True;
9300     }
9301     /* fall through */
9302   }
9303
9304    /* ----------- ssat16<c> <Rd>,#<imm>,<Rn> ----------- */
9305    {
9306      UInt regD = 99, regN = 99, sat_imm = 99;
9307      Bool gate = False;
9308
9309      if (isT) {
9310         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9311             && INSNT0(5,4) == BITS2(1,0)
9312             && INSNT1(15,12) == BITS4(0,0,0,0)
9313             && INSNT1(7,4) == BITS4(0,0,0,0)) {
9314            regD       = INSNT1(11,8);
9315            regN       = INSNT0(3,0);
9316            sat_imm    = INSNT1(3,0) + 1;
9317            if (!isBadRegT(regD) && !isBadRegT(regN))
9318               gate = True;
9319         }
9320      } else {
9321         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,1,0) &&
9322             INSNA(11,4)   == BITS8(1,1,1,1,0,0,1,1)) {
9323            regD       = INSNA(15,12);
9324            regN       = INSNA(3,0);
9325            sat_imm    = INSNA(19,16) + 1;
9326            if (regD != 15 && regN != 15)
9327               gate = True;
9328         }
9329      }
9330
9331      if (gate) {
9332         IRTemp irt_regN    = newTemp(Ity_I32);
9333         IRTemp irt_regN_lo = newTemp(Ity_I32);
9334         IRTemp irt_regN_hi = newTemp(Ity_I32);
9335         IRTemp irt_Q_lo    = newTemp(Ity_I32);
9336         IRTemp irt_Q_hi    = newTemp(Ity_I32);
9337         IRTemp irt_res_lo  = newTemp(Ity_I32);
9338         IRTemp irt_res_hi  = newTemp(Ity_I32);
9339
9340         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9341         assign( irt_regN_lo,
9342                 binop( Iop_Sar32,
9343                        binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9344                        mkU8(16)) );
9345         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9346
9347         armSignedSatQ( irt_regN_lo, sat_imm, &irt_res_lo, &irt_Q_lo );
9348         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9349
9350         armSignedSatQ( irt_regN_hi, sat_imm, &irt_res_hi, &irt_Q_hi );
9351         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9352
9353         IRExpr* ire_result
9354            = binop(Iop_Or32,
9355                    binop(Iop_And32, mkexpr(irt_res_lo), mkU32(0xFFFF)),
9356                    binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)));
9357         if (isT)
9358            putIRegT( regD, ire_result, condT );
9359         else
9360            putIRegA( regD, ire_result, condT, Ijk_Boring );
9361
9362         DIP( "ssat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9363         return True;
9364      }
9365      /* fall through */
9366    }
9367
9368    /* -------------- usat16<c> <Rd>,#<imm4>,<Rn> --------------- */
9369    {
9370      UInt regD = 99, regN = 99, sat_imm = 99;
9371      Bool gate = False;
9372
9373      if (isT) {
9374         if (INSNT0(15,4) == 0xF3A && (INSNT1(15,0) & 0xF0F0) == 0x0000) {
9375            regN = INSNT0(3,0);
9376            regD = INSNT1(11,8);
9377            sat_imm = INSNT1(3,0);
9378            if (!isBadRegT(regD) && !isBadRegT(regN))
9379               gate = True;
9380        }
9381      } else {
9382         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,1,0) &&
9383             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9384             INSNA(7,4)   == BITS4(0,0,1,1)) {
9385            regD    = INSNA(15,12);
9386            regN    = INSNA(3,0);
9387            sat_imm = INSNA(19,16);
9388            if (regD != 15 && regN != 15)
9389               gate = True;
9390         }
9391      }
9392
9393      if (gate) {
9394         IRTemp irt_regN    = newTemp(Ity_I32);
9395         IRTemp irt_regN_lo = newTemp(Ity_I32);
9396         IRTemp irt_regN_hi = newTemp(Ity_I32);
9397         IRTemp irt_Q_lo    = newTemp(Ity_I32);
9398         IRTemp irt_Q_hi    = newTemp(Ity_I32);
9399         IRTemp irt_res_lo  = newTemp(Ity_I32);
9400         IRTemp irt_res_hi  = newTemp(Ity_I32);
9401
9402         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9403         assign( irt_regN_lo, binop( Iop_Sar32,
9404                                     binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9405                                     mkU8(16)) );
9406         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9407
9408         armUnsignedSatQ( &irt_res_lo, &irt_Q_lo, irt_regN_lo, sat_imm );
9409         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9410
9411         armUnsignedSatQ( &irt_res_hi, &irt_Q_hi, irt_regN_hi, sat_imm );
9412         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9413
9414         IRExpr* ire_result = binop( Iop_Or32,
9415                                     binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)),
9416                                     mkexpr(irt_res_lo) );
9417
9418         if (isT)
9419            putIRegT( regD, ire_result, condT );
9420         else
9421            putIRegA( regD, ire_result, condT, Ijk_Boring );
9422
9423         DIP( "usat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9424         return True;
9425      }
9426      /* fall through */
9427    }
9428
9429    /* -------------- uadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9430    {
9431      UInt regD = 99, regN = 99, regM = 99;
9432      Bool gate = False;
9433
9434      if (isT) {
9435         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9436            regN = INSNT0(3,0);
9437            regD = INSNT1(11,8);
9438            regM = INSNT1(3,0);
9439            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9440               gate = True;
9441         }
9442      } else {
9443         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9444             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9445             INSNA(7,4)   == BITS4(0,0,0,1)) {
9446            regD = INSNA(15,12);
9447            regN = INSNA(19,16);
9448            regM = INSNA(3,0);
9449            if (regD != 15 && regN != 15 && regM != 15)
9450               gate = True;
9451         }
9452      }
9453
9454      if (gate) {
9455         IRTemp rNt  = newTemp(Ity_I32);
9456         IRTemp rMt  = newTemp(Ity_I32);
9457         IRTemp res  = newTemp(Ity_I32);
9458         IRTemp reso = newTemp(Ity_I32);
9459
9460         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9461         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9462
9463         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9464         if (isT)
9465            putIRegT( regD, mkexpr(res), condT );
9466         else
9467            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9468
9469         assign(reso, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
9470         set_GE_32_10_from_bits_31_15(reso, condT);
9471
9472         DIP("uadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9473         return True;
9474      }
9475      /* fall through */
9476    }
9477
9478    /* -------------- sadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9479    {
9480      UInt regD = 99, regN = 99, regM = 99;
9481      Bool gate = False;
9482
9483      if (isT) {
9484         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9485            regN = INSNT0(3,0);
9486            regD = INSNT1(11,8);
9487            regM = INSNT1(3,0);
9488            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9489               gate = True;
9490         }
9491      } else {
9492         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9493             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9494             INSNA(7,4)   == BITS4(0,0,0,1)) {
9495            regD = INSNA(15,12);
9496            regN = INSNA(19,16);
9497            regM = INSNA(3,0);
9498            if (regD != 15 && regN != 15 && regM != 15)
9499               gate = True;
9500         }
9501      }
9502
9503      if (gate) {
9504         IRTemp rNt  = newTemp(Ity_I32);
9505         IRTemp rMt  = newTemp(Ity_I32);
9506         IRTemp res  = newTemp(Ity_I32);
9507         IRTemp reso = newTemp(Ity_I32);
9508
9509         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9510         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9511
9512         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9513         if (isT)
9514            putIRegT( regD, mkexpr(res), condT );
9515         else
9516            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9517
9518         assign(reso, unop(Iop_Not32,
9519                           binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt))));
9520         set_GE_32_10_from_bits_31_15(reso, condT);
9521
9522         DIP("sadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9523         return True;
9524      }
9525      /* fall through */
9526    }
9527
9528    /* ---------------- usub16<c> <Rd>,<Rn>,<Rm> ---------------- */
9529    {
9530      UInt regD = 99, regN = 99, regM = 99;
9531      Bool gate = False;
9532
9533      if (isT) {
9534         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9535            regN = INSNT0(3,0);
9536            regD = INSNT1(11,8);
9537            regM = INSNT1(3,0);
9538            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9539               gate = True;
9540         }
9541      } else {
9542         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9543             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9544             INSNA(7,4)   == BITS4(0,1,1,1)) {
9545            regD = INSNA(15,12);
9546            regN = INSNA(19,16);
9547            regM = INSNA(3,0);
9548            if (regD != 15 && regN != 15 && regM != 15)
9549              gate = True;
9550         }
9551      }
9552
9553      if (gate) {
9554         IRTemp rNt  = newTemp(Ity_I32);
9555         IRTemp rMt  = newTemp(Ity_I32);
9556         IRTemp res  = newTemp(Ity_I32);
9557         IRTemp reso = newTemp(Ity_I32);
9558
9559         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9560         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9561
9562         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9563         if (isT)
9564            putIRegT( regD, mkexpr(res), condT );
9565         else
9566            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9567
9568         assign(reso, unop(Iop_Not32,
9569                           binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt))));
9570         set_GE_32_10_from_bits_31_15(reso, condT);
9571
9572         DIP("usub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9573         return True;
9574      }
9575      /* fall through */
9576    }
9577
9578    /* -------------- ssub16<c> <Rd>,<Rn>,<Rm> -------------- */
9579    {
9580      UInt regD = 99, regN = 99, regM = 99;
9581      Bool gate = False;
9582
9583      if (isT) {
9584         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9585            regN = INSNT0(3,0);
9586            regD = INSNT1(11,8);
9587            regM = INSNT1(3,0);
9588            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9589               gate = True;
9590         }
9591      } else {
9592         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9593             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9594             INSNA(7,4)   == BITS4(0,1,1,1)) {
9595            regD = INSNA(15,12);
9596            regN = INSNA(19,16);
9597            regM = INSNA(3,0);
9598            if (regD != 15 && regN != 15 && regM != 15)
9599               gate = True;
9600         }
9601      }
9602
9603      if (gate) {
9604         IRTemp rNt  = newTemp(Ity_I32);
9605         IRTemp rMt  = newTemp(Ity_I32);
9606         IRTemp res  = newTemp(Ity_I32);
9607         IRTemp reso = newTemp(Ity_I32);
9608
9609         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9610         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9611
9612         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9613         if (isT)
9614            putIRegT( regD, mkexpr(res), condT );
9615         else
9616            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9617
9618         assign(reso, unop(Iop_Not32,
9619                           binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt))));
9620         set_GE_32_10_from_bits_31_15(reso, condT);
9621
9622         DIP("ssub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9623         return True;
9624      }
9625      /* fall through */
9626    }
9627
9628    /* ----------------- uadd8<c> <Rd>,<Rn>,<Rm> ---------------- */
9629    {
9630      UInt regD = 99, regN = 99, regM = 99;
9631      Bool gate = False;
9632
9633      if (isT) {
9634         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9635            regN = INSNT0(3,0);
9636            regD = INSNT1(11,8);
9637            regM = INSNT1(3,0);
9638            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9639               gate = True;
9640         }
9641      } else {
9642         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9643             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9644             (INSNA(7,4)  == BITS4(1,0,0,1))) {
9645            regD = INSNA(15,12);
9646            regN = INSNA(19,16);
9647            regM = INSNA(3,0);
9648            if (regD != 15 && regN != 15 && regM != 15)
9649               gate = True;
9650         }
9651      }
9652
9653      if (gate) {
9654         IRTemp rNt  = newTemp(Ity_I32);
9655         IRTemp rMt  = newTemp(Ity_I32);
9656         IRTemp res  = newTemp(Ity_I32);
9657         IRTemp reso = newTemp(Ity_I32);
9658
9659         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9660         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9661
9662         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9663         if (isT)
9664            putIRegT( regD, mkexpr(res), condT );
9665         else
9666            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9667
9668         assign(reso, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9669         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9670
9671         DIP("uadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9672         return True;
9673      }
9674      /* fall through */
9675    }
9676
9677    /* ------------------- sadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9678    {
9679      UInt regD = 99, regN = 99, regM = 99;
9680      Bool gate = False;
9681
9682      if (isT) {
9683         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9684            regN = INSNT0(3,0);
9685            regD = INSNT1(11,8);
9686            regM = INSNT1(3,0);
9687            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9688               gate = True;
9689         }
9690      } else {
9691         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9692             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9693             (INSNA(7,4)  == BITS4(1,0,0,1))) {
9694            regD = INSNA(15,12);
9695            regN = INSNA(19,16);
9696            regM = INSNA(3,0);
9697            if (regD != 15 && regN != 15 && regM != 15)
9698               gate = True;
9699         }
9700      }
9701
9702      if (gate) {
9703         IRTemp rNt  = newTemp(Ity_I32);
9704         IRTemp rMt  = newTemp(Ity_I32);
9705         IRTemp res  = newTemp(Ity_I32);
9706         IRTemp reso = newTemp(Ity_I32);
9707
9708         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9709         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9710
9711         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9712         if (isT)
9713            putIRegT( regD, mkexpr(res), condT );
9714         else
9715            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9716
9717         assign(reso, unop(Iop_Not32,
9718                           binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt))));
9719         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9720
9721         DIP("sadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9722         return True;
9723      }
9724      /* fall through */
9725    }
9726
9727    /* ------------------- usub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9728    {
9729      UInt regD = 99, regN = 99, regM = 99;
9730      Bool gate = False;
9731
9732      if (isT) {
9733         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9734            regN = INSNT0(3,0);
9735            regD = INSNT1(11,8);
9736            regM = INSNT1(3,0);
9737            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9738               gate = True;
9739         }
9740      } else {
9741         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9742             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9743             (INSNA(7,4)  == BITS4(1,1,1,1))) {
9744            regD = INSNA(15,12);
9745            regN = INSNA(19,16);
9746            regM = INSNA(3,0);
9747            if (regD != 15 && regN != 15 && regM != 15)
9748              gate = True;
9749         }
9750      }
9751
9752      if (gate) {
9753         IRTemp rNt  = newTemp(Ity_I32);
9754         IRTemp rMt  = newTemp(Ity_I32);
9755         IRTemp res  = newTemp(Ity_I32);
9756         IRTemp reso = newTemp(Ity_I32);
9757
9758         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9759         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9760
9761         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9762         if (isT)
9763            putIRegT( regD, mkexpr(res), condT );
9764         else
9765            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9766
9767         assign(reso, unop(Iop_Not32,
9768                           binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt))));
9769         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9770
9771         DIP("usub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9772         return True;
9773      }
9774      /* fall through */
9775    }
9776
9777    /* ------------------- ssub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9778    {
9779      UInt regD = 99, regN = 99, regM = 99;
9780      Bool gate = False;
9781
9782      if (isT) {
9783         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9784            regN = INSNT0(3,0);
9785            regD = INSNT1(11,8);
9786            regM = INSNT1(3,0);
9787            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9788               gate = True;
9789         }
9790      } else {
9791         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9792             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9793             INSNA(7,4)   == BITS4(1,1,1,1)) {
9794            regD = INSNA(15,12);
9795            regN = INSNA(19,16);
9796            regM = INSNA(3,0);
9797            if (regD != 15 && regN != 15 && regM != 15)
9798               gate = True;
9799         }
9800      }
9801
9802      if (gate) {
9803         IRTemp rNt  = newTemp(Ity_I32);
9804         IRTemp rMt  = newTemp(Ity_I32);
9805         IRTemp res  = newTemp(Ity_I32);
9806         IRTemp reso = newTemp(Ity_I32);
9807
9808         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9809         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9810
9811         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9812         if (isT)
9813            putIRegT( regD, mkexpr(res), condT );
9814         else
9815            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9816
9817         assign(reso, unop(Iop_Not32,
9818                           binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt))));
9819         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9820
9821         DIP("ssub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9822         return True;
9823      }
9824      /* fall through */
9825    }
9826
9827    /* ------------------ qadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
9828    {
9829      UInt regD = 99, regN = 99, regM = 99;
9830      Bool gate = False;
9831
9832      if (isT) {
9833         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9834            regN = INSNT0(3,0);
9835            regD = INSNT1(11,8);
9836            regM = INSNT1(3,0);
9837            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9838               gate = True;
9839         }
9840      } else {
9841         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9842             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9843             INSNA(7,4)   == BITS4(1,0,0,1)) {
9844            regD = INSNA(15,12);
9845            regN = INSNA(19,16);
9846            regM = INSNA(3,0);
9847            if (regD != 15 && regN != 15 && regM != 15)
9848               gate = True;
9849         }
9850      }
9851
9852      if (gate) {
9853         IRTemp rNt   = newTemp(Ity_I32);
9854         IRTemp rMt   = newTemp(Ity_I32);
9855         IRTemp res_q = newTemp(Ity_I32);
9856
9857         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9858         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9859
9860         assign(res_q, binop(Iop_QAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
9861         if (isT)
9862            putIRegT( regD, mkexpr(res_q), condT );
9863         else
9864            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9865
9866         DIP("qadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9867         return True;
9868      }
9869      /* fall through */
9870    }
9871
9872    /* ------------------ qsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
9873    {
9874      UInt regD = 99, regN = 99, regM = 99;
9875      Bool gate = False;
9876
9877      if (isT) {
9878         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9879            regN = INSNT0(3,0);
9880            regD = INSNT1(11,8);
9881            regM = INSNT1(3,0);
9882            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9883               gate = True;
9884         }
9885      } else {
9886         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9887             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9888             INSNA(7,4)   == BITS4(1,1,1,1)) {
9889            regD = INSNA(15,12);
9890            regN = INSNA(19,16);
9891            regM = INSNA(3,0);
9892            if (regD != 15 && regN != 15 && regM != 15)
9893               gate = True;
9894         }
9895      }
9896
9897      if (gate) {
9898         IRTemp rNt   = newTemp(Ity_I32);
9899         IRTemp rMt   = newTemp(Ity_I32);
9900         IRTemp res_q = newTemp(Ity_I32);
9901
9902         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9903         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9904
9905         assign(res_q, binop(Iop_QSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
9906         if (isT)
9907            putIRegT( regD, mkexpr(res_q), condT );
9908         else
9909            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9910
9911         DIP("qsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9912         return True;
9913      }
9914      /* fall through */
9915    }
9916
9917    /* ------------------ uqadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9918    {
9919      UInt regD = 99, regN = 99, regM = 99;
9920      Bool gate = False;
9921
9922      if (isT) {
9923         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9924            regN = INSNT0(3,0);
9925            regD = INSNT1(11,8);
9926            regM = INSNT1(3,0);
9927            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9928               gate = True;
9929         }
9930      } else {
9931         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9932             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9933             (INSNA(7,4)  == BITS4(1,0,0,1))) {
9934            regD = INSNA(15,12);
9935            regN = INSNA(19,16);
9936            regM = INSNA(3,0);
9937            if (regD != 15 && regN != 15 && regM != 15)
9938               gate = True;
9939         }
9940      }
9941
9942      if (gate) {
9943         IRTemp rNt   = newTemp(Ity_I32);
9944         IRTemp rMt   = newTemp(Ity_I32);
9945         IRTemp res_q = newTemp(Ity_I32);
9946
9947         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9948         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9949
9950         assign(res_q, binop(Iop_QAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9951         if (isT)
9952            putIRegT( regD, mkexpr(res_q), condT );
9953         else
9954            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9955
9956         DIP("uqadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9957         return True;
9958      }
9959      /* fall through */
9960    }
9961
9962    /* ------------------ uqsub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9963    {
9964      UInt regD = 99, regN = 99, regM = 99;
9965      Bool gate = False;
9966
9967      if (isT) {
9968         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9969            regN = INSNT0(3,0);
9970            regD = INSNT1(11,8);
9971            regM = INSNT1(3,0);
9972            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9973               gate = True;
9974         }
9975      } else {
9976         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9977             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9978             (INSNA(7,4)  == BITS4(1,1,1,1))) {
9979            regD = INSNA(15,12);
9980            regN = INSNA(19,16);
9981            regM = INSNA(3,0);
9982            if (regD != 15 && regN != 15 && regM != 15)
9983              gate = True;
9984         }
9985      }
9986
9987      if (gate) {
9988         IRTemp rNt   = newTemp(Ity_I32);
9989         IRTemp rMt   = newTemp(Ity_I32);
9990         IRTemp res_q = newTemp(Ity_I32);
9991
9992         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9993         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9994
9995         assign(res_q, binop(Iop_QSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
9996         if (isT)
9997            putIRegT( regD, mkexpr(res_q), condT );
9998         else
9999            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10000
10001         DIP("uqsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10002         return True;
10003      }
10004      /* fall through */
10005    }
10006
10007    /* ----------------- uhadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
10008    {
10009      UInt regD = 99, regN = 99, regM = 99;
10010      Bool gate = False;
10011
10012      if (isT) {
10013         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
10014            regN = INSNT0(3,0);
10015            regD = INSNT1(11,8);
10016            regM = INSNT1(3,0);
10017            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10018               gate = True;
10019         }
10020      } else {
10021         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
10022             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10023             INSNA(7,4)   == BITS4(1,0,0,1)) {
10024            regD = INSNA(15,12);
10025            regN = INSNA(19,16);
10026            regM = INSNA(3,0);
10027            if (regD != 15 && regN != 15 && regM != 15)
10028               gate = True;
10029         }
10030      }
10031
10032      if (gate) {
10033         IRTemp rNt   = newTemp(Ity_I32);
10034         IRTemp rMt   = newTemp(Ity_I32);
10035         IRTemp res_q = newTemp(Ity_I32);
10036
10037         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10038         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10039
10040         assign(res_q, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
10041         if (isT)
10042            putIRegT( regD, mkexpr(res_q), condT );
10043         else
10044            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10045
10046         DIP("uhadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10047         return True;
10048      }
10049      /* fall through */
10050    }
10051
10052    /* ----------------- uhadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
10053    {
10054      UInt regD = 99, regN = 99, regM = 99;
10055      Bool gate = False;
10056
10057      if (isT) {
10058         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
10059            regN = INSNT0(3,0);
10060            regD = INSNT1(11,8);
10061            regM = INSNT1(3,0);
10062            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10063               gate = True;
10064         }
10065      } else {
10066         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
10067             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10068             INSNA(7,4)   == BITS4(0,0,0,1)) {
10069            regD = INSNA(15,12);
10070            regN = INSNA(19,16);
10071            regM = INSNA(3,0);
10072            if (regD != 15 && regN != 15 && regM != 15)
10073               gate = True;
10074         }
10075      }
10076
10077      if (gate) {
10078         IRTemp rNt   = newTemp(Ity_I32);
10079         IRTemp rMt   = newTemp(Ity_I32);
10080         IRTemp res_q = newTemp(Ity_I32);
10081
10082         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10083         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10084
10085         assign(res_q, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
10086         if (isT)
10087            putIRegT( regD, mkexpr(res_q), condT );
10088         else
10089            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10090
10091         DIP("uhadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10092         return True;
10093      }
10094      /* fall through */
10095    }
10096
10097    /* ----------------- shadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
10098    {
10099      UInt regD = 99, regN = 99, regM = 99;
10100      Bool gate = False;
10101
10102      if (isT) {
10103         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
10104            regN = INSNT0(3,0);
10105            regD = INSNT1(11,8);
10106            regM = INSNT1(3,0);
10107            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10108               gate = True;
10109         }
10110      } else {
10111         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
10112             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10113             INSNA(7,4)   == BITS4(1,0,0,1)) {
10114            regD = INSNA(15,12);
10115            regN = INSNA(19,16);
10116            regM = INSNA(3,0);
10117            if (regD != 15 && regN != 15 && regM != 15)
10118               gate = True;
10119         }
10120      }
10121
10122      if (gate) {
10123         IRTemp rNt   = newTemp(Ity_I32);
10124         IRTemp rMt   = newTemp(Ity_I32);
10125         IRTemp res_q = newTemp(Ity_I32);
10126
10127         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10128         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10129
10130         assign(res_q, binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
10131         if (isT)
10132            putIRegT( regD, mkexpr(res_q), condT );
10133         else
10134            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10135
10136         DIP("shadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10137         return True;
10138      }
10139      /* fall through */
10140    }
10141
10142    /* ------------------ qadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
10143    {
10144      UInt regD = 99, regN = 99, regM = 99;
10145      Bool gate = False;
10146
10147      if (isT) {
10148         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10149            regN = INSNT0(3,0);
10150            regD = INSNT1(11,8);
10151            regM = INSNT1(3,0);
10152            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10153               gate = True;
10154         }
10155      } else {
10156         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10157             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10158             INSNA(7,4)   == BITS4(0,0,0,1)) {
10159            regD = INSNA(15,12);
10160            regN = INSNA(19,16);
10161            regM = INSNA(3,0);
10162            if (regD != 15 && regN != 15 && regM != 15)
10163               gate = True;
10164         }
10165      }
10166
10167      if (gate) {
10168         IRTemp rNt   = newTemp(Ity_I32);
10169         IRTemp rMt   = newTemp(Ity_I32);
10170         IRTemp res_q = newTemp(Ity_I32);
10171
10172         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10173         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10174
10175         assign(res_q, binop(Iop_QAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
10176         if (isT)
10177            putIRegT( regD, mkexpr(res_q), condT );
10178         else
10179            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10180
10181         DIP("qadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10182         return True;
10183      }
10184      /* fall through */
10185    }
10186
10187    /* ------------------ qsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
10188    {
10189      UInt regD = 99, regN = 99, regM = 99;
10190      Bool gate = False;
10191
10192       if (isT) {
10193         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10194            regN = INSNT0(3,0);
10195            regD = INSNT1(11,8);
10196            regM = INSNT1(3,0);
10197            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10198               gate = True;
10199         }
10200      } else {
10201         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10202             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10203             INSNA(7,4)   == BITS4(0,1,1,1)) {
10204            regD = INSNA(15,12);
10205            regN = INSNA(19,16);
10206            regM = INSNA(3,0);
10207            if (regD != 15 && regN != 15 && regM != 15)
10208              gate = True;
10209         }
10210      }
10211
10212      if (gate) {
10213         IRTemp rNt   = newTemp(Ity_I32);
10214         IRTemp rMt   = newTemp(Ity_I32);
10215         IRTemp res_q = newTemp(Ity_I32);
10216
10217         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10218         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10219
10220         assign(res_q, binop(Iop_QSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
10221         if (isT)
10222            putIRegT( regD, mkexpr(res_q), condT );
10223         else
10224            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10225
10226         DIP("qsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10227         return True;
10228      }
10229      /* fall through */
10230    }
10231
10232    /* ------------------- qsax<c> <Rd>,<Rn>,<Rm> ------------------- */
10233    /* note: the hardware seems to construct the result differently
10234       from wot the manual says. */
10235    {
10236      UInt regD = 99, regN = 99, regM = 99;
10237      Bool gate = False;
10238
10239      if (isT) {
10240         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10241            regN = INSNT0(3,0);
10242            regD = INSNT1(11,8);
10243            regM = INSNT1(3,0);
10244            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10245               gate = True;
10246         }
10247      } else {
10248         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10249             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10250             INSNA(7,4)   == BITS4(0,1,0,1)) {
10251            regD = INSNA(15,12);
10252            regN = INSNA(19,16);
10253            regM = INSNA(3,0);
10254            if (regD != 15 && regN != 15 && regM != 15)
10255               gate = True;
10256         }
10257      }
10258
10259      if (gate) {
10260         IRTemp irt_regN     = newTemp(Ity_I32);
10261         IRTemp irt_regM     = newTemp(Ity_I32);
10262         IRTemp irt_sum      = newTemp(Ity_I32);
10263         IRTemp irt_diff     = newTemp(Ity_I32);
10264         IRTemp irt_sum_res  = newTemp(Ity_I32);
10265         IRTemp irt_diff_res = newTemp(Ity_I32);
10266
10267         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10268         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10269
10270         assign( irt_diff,
10271                 binop( Iop_Sub32,
10272                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10273                        binop( Iop_Sar32,
10274                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10275                               mkU8(16) ) ) );
10276         armSignedSatQ( irt_diff, 0x10, &irt_diff_res, NULL);
10277
10278         assign( irt_sum,
10279                 binop( Iop_Add32,
10280                        binop( Iop_Sar32,
10281                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10282                               mkU8(16) ),
10283                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) )) );
10284         armSignedSatQ( irt_sum, 0x10, &irt_sum_res, NULL );
10285
10286         IRExpr* ire_result = binop( Iop_Or32,
10287                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
10288                                            mkU8(16) ),
10289                                     binop( Iop_And32, mkexpr(irt_sum_res),
10290                                            mkU32(0xFFFF)) );
10291
10292         if (isT)
10293            putIRegT( regD, ire_result, condT );
10294         else
10295            putIRegA( regD, ire_result, condT, Ijk_Boring );
10296
10297         DIP( "qsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10298         return True;
10299      }
10300      /* fall through */
10301    }
10302
10303    /* ------------------- qasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10304    {
10305      UInt regD = 99, regN = 99, regM = 99;
10306      Bool gate = False;
10307
10308      if (isT) {
10309         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10310            regN = INSNT0(3,0);
10311            regD = INSNT1(11,8);
10312            regM = INSNT1(3,0);
10313            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10314               gate = True;
10315         }
10316      } else {
10317         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10318             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10319             INSNA(7,4)   == BITS4(0,0,1,1)) {
10320            regD = INSNA(15,12);
10321            regN = INSNA(19,16);
10322            regM = INSNA(3,0);
10323            if (regD != 15 && regN != 15 && regM != 15)
10324               gate = True;
10325         }
10326      }
10327
10328      if (gate) {
10329         IRTemp irt_regN     = newTemp(Ity_I32);
10330         IRTemp irt_regM     = newTemp(Ity_I32);
10331         IRTemp irt_sum      = newTemp(Ity_I32);
10332         IRTemp irt_diff     = newTemp(Ity_I32);
10333         IRTemp irt_res_sum  = newTemp(Ity_I32);
10334         IRTemp irt_res_diff = newTemp(Ity_I32);
10335
10336         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10337         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10338
10339         assign( irt_diff,
10340                 binop( Iop_Sub32,
10341                        binop( Iop_Sar32,
10342                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10343                               mkU8(16) ),
10344                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10345         armSignedSatQ( irt_diff, 0x10, &irt_res_diff, NULL );
10346
10347         assign( irt_sum,
10348                 binop( Iop_Add32,
10349                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10350                        binop( Iop_Sar32,
10351                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10352                               mkU8(16) ) ) );
10353         armSignedSatQ( irt_sum, 0x10, &irt_res_sum, NULL );
10354
10355         IRExpr* ire_result
10356           = binop( Iop_Or32,
10357                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
10358                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
10359
10360         if (isT)
10361            putIRegT( regD, ire_result, condT );
10362         else
10363            putIRegA( regD, ire_result, condT, Ijk_Boring );
10364
10365         DIP( "qasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10366         return True;
10367      }
10368      /* fall through */
10369    }
10370
10371    /* ------------------- sasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10372    {
10373      UInt regD = 99, regN = 99, regM = 99;
10374      Bool gate = False;
10375
10376      if (isT) {
10377         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
10378            regN = INSNT0(3,0);
10379            regD = INSNT1(11,8);
10380            regM = INSNT1(3,0);
10381            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10382               gate = True;
10383         }
10384      } else {
10385         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
10386             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10387             INSNA(7,4)   == BITS4(0,0,1,1)) {
10388            regD = INSNA(15,12);
10389            regN = INSNA(19,16);
10390            regM = INSNA(3,0);
10391            if (regD != 15 && regN != 15 && regM != 15)
10392               gate = True;
10393         }
10394      }
10395
10396      if (gate) {
10397         IRTemp irt_regN = newTemp(Ity_I32);
10398         IRTemp irt_regM = newTemp(Ity_I32);
10399         IRTemp irt_sum  = newTemp(Ity_I32);
10400         IRTemp irt_diff = newTemp(Ity_I32);
10401
10402         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10403         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10404
10405         assign( irt_diff,
10406                 binop( Iop_Sub32,
10407                        binop( Iop_Sar32,
10408                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10409                               mkU8(16) ),
10410                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10411
10412         assign( irt_sum,
10413                 binop( Iop_Add32,
10414                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10415                        binop( Iop_Sar32,
10416                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10417                               mkU8(16) ) ) );
10418
10419         IRExpr* ire_result
10420           = binop( Iop_Or32,
10421                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
10422                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
10423
10424         IRTemp ge10 = newTemp(Ity_I32);
10425         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
10426         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
10427         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
10428
10429         IRTemp ge32 = newTemp(Ity_I32);
10430         assign(ge32, unop(Iop_Not32, mkexpr(irt_sum)));
10431         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
10432         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
10433
10434         if (isT)
10435            putIRegT( regD, ire_result, condT );
10436         else
10437            putIRegA( regD, ire_result, condT, Ijk_Boring );
10438
10439         DIP( "sasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10440         return True;
10441      }
10442      /* fall through */
10443    }
10444
10445    /* --------------- smuad, smuadx<c><Rd>,<Rn>,<Rm> --------------- */
10446    /* --------------- smsad, smsadx<c><Rd>,<Rn>,<Rm> --------------- */
10447    {
10448      UInt regD = 99, regN = 99, regM = 99, bitM = 99;
10449      Bool gate = False, isAD = False;
10450
10451      if (isT) {
10452         if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10453             && (INSNT1(15,0) & 0xF0E0) == 0xF000) {
10454            regN = INSNT0(3,0);
10455            regD = INSNT1(11,8);
10456            regM = INSNT1(3,0);
10457            bitM = INSNT1(4,4);
10458            isAD = INSNT0(15,4) == 0xFB2;
10459            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10460               gate = True;
10461         }
10462      } else {
10463         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10464             INSNA(15,12) == BITS4(1,1,1,1)         &&
10465             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1) ) {
10466            regD = INSNA(19,16);
10467            regN = INSNA(3,0);
10468            regM = INSNA(11,8);
10469            bitM = INSNA(5,5);
10470            isAD = INSNA(6,6) == 0;
10471            if (regD != 15 && regN != 15 && regM != 15)
10472               gate = True;
10473         }
10474      }
10475
10476      if (gate) {
10477         IRTemp irt_regN    = newTemp(Ity_I32);
10478         IRTemp irt_regM    = newTemp(Ity_I32);
10479         IRTemp irt_prod_lo = newTemp(Ity_I32);
10480         IRTemp irt_prod_hi = newTemp(Ity_I32);
10481         IRTemp tmpM        = newTemp(Ity_I32);
10482
10483         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10484
10485         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10486         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10487
10488         assign( irt_prod_lo,
10489                 binop( Iop_Mul32,
10490                        binop( Iop_Sar32,
10491                               binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
10492                               mkU8(16) ),
10493                        binop( Iop_Sar32,
10494                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10495                               mkU8(16) ) ) );
10496         assign( irt_prod_hi, binop(Iop_Mul32,
10497                                    binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)),
10498                                    binop(Iop_Sar32, mkexpr(irt_regM), mkU8(16))) );
10499         IRExpr* ire_result
10500            = binop( isAD ? Iop_Add32 : Iop_Sub32,
10501                     mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) );
10502
10503         if (isT)
10504            putIRegT( regD, ire_result, condT );
10505         else
10506            putIRegA( regD, ire_result, condT, Ijk_Boring );
10507
10508         if (isAD) {
10509            or_into_QFLAG32(
10510               signed_overflow_after_Add32( ire_result,
10511                                            irt_prod_lo, irt_prod_hi ),
10512               condT
10513            );
10514         }
10515
10516         DIP("smu%cd%s%s r%u, r%u, r%u\n",
10517             isAD ? 'a' : 's',
10518             bitM ? "x" : "", nCC(conq), regD, regN, regM);
10519         return True;
10520      }
10521      /* fall through */
10522    }
10523
10524    /* --------------- smlad{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10525    /* --------------- smlsd{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10526    {
10527      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10528      Bool gate = False, isAD = False;
10529
10530      if (isT) {
10531        if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10532            && INSNT1(7,5) == BITS3(0,0,0)) {
10533            regN = INSNT0(3,0);
10534            regD = INSNT1(11,8);
10535            regM = INSNT1(3,0);
10536            regA = INSNT1(15,12);
10537            bitM = INSNT1(4,4);
10538            isAD = INSNT0(15,4) == 0xFB2;
10539            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10540                && !isBadRegT(regA))
10541               gate = True;
10542         }
10543      } else {
10544         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10545             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
10546            regD = INSNA(19,16);
10547            regA = INSNA(15,12);
10548            regN = INSNA(3,0);
10549            regM = INSNA(11,8);
10550            bitM = INSNA(5,5);
10551            isAD = INSNA(6,6) == 0;
10552            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10553               gate = True;
10554         }
10555      }
10556
10557      if (gate) {
10558         IRTemp irt_regN    = newTemp(Ity_I32);
10559         IRTemp irt_regM    = newTemp(Ity_I32);
10560         IRTemp irt_regA    = newTemp(Ity_I32);
10561         IRTemp irt_prod_lo = newTemp(Ity_I32);
10562         IRTemp irt_prod_hi = newTemp(Ity_I32);
10563         IRTemp irt_sum     = newTemp(Ity_I32);
10564         IRTemp tmpM        = newTemp(Ity_I32);
10565
10566         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10567         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10568
10569         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10570         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10571
10572         assign( irt_prod_lo,
10573                 binop(Iop_Mul32,
10574                       binop(Iop_Sar32,
10575                             binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10576                             mkU8(16)),
10577                       binop(Iop_Sar32,
10578                             binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10579                             mkU8(16))) );
10580         assign( irt_prod_hi,
10581                 binop( Iop_Mul32,
10582                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10583                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10584         assign( irt_sum, binop( isAD ? Iop_Add32 : Iop_Sub32,
10585                                 mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ) );
10586
10587         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_sum), mkexpr(irt_regA));
10588
10589         if (isT)
10590            putIRegT( regD, ire_result, condT );
10591         else
10592            putIRegA( regD, ire_result, condT, Ijk_Boring );
10593
10594         if (isAD) {
10595            or_into_QFLAG32(
10596               signed_overflow_after_Add32( mkexpr(irt_sum),
10597                                            irt_prod_lo, irt_prod_hi ),
10598               condT
10599            );
10600         }
10601
10602         or_into_QFLAG32(
10603            signed_overflow_after_Add32( ire_result, irt_sum, irt_regA ),
10604            condT
10605         );
10606
10607         DIP("sml%cd%s%s r%u, r%u, r%u, r%u\n",
10608             isAD ? 'a' : 's',
10609             bitM ? "x" : "", nCC(conq), regD, regN, regM, regA);
10610         return True;
10611      }
10612      /* fall through */
10613    }
10614
10615    /* ----- smlabb, smlabt, smlatb, smlatt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10616    {
10617      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99, bitN = 99;
10618      Bool gate = False;
10619
10620      if (isT) {
10621         if (INSNT0(15,4) == 0xFB1 && INSNT1(7,6) == BITS2(0,0)) {
10622            regN = INSNT0(3,0);
10623            regD = INSNT1(11,8);
10624            regM = INSNT1(3,0);
10625            regA = INSNT1(15,12);
10626            bitM = INSNT1(4,4);
10627            bitN = INSNT1(5,5);
10628            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10629                && !isBadRegT(regA))
10630               gate = True;
10631         }
10632      } else {
10633         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
10634             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10635            regD = INSNA(19,16);
10636            regN = INSNA(3,0);
10637            regM = INSNA(11,8);
10638            regA = INSNA(15,12);
10639            bitM = INSNA(6,6);
10640            bitN = INSNA(5,5);
10641            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10642               gate = True;
10643         }
10644      }
10645
10646      if (gate) {
10647         IRTemp irt_regA = newTemp(Ity_I32);
10648         IRTemp irt_prod = newTemp(Ity_I32);
10649
10650         assign( irt_prod,
10651                 binop(Iop_Mul32,
10652                       binop(Iop_Sar32,
10653                             binop(Iop_Shl32,
10654                                   isT ? getIRegT(regN) : getIRegA(regN),
10655                                   mkU8(bitN ? 0 : 16)),
10656                             mkU8(16)),
10657                       binop(Iop_Sar32,
10658                             binop(Iop_Shl32,
10659                                   isT ? getIRegT(regM) : getIRegA(regM),
10660                                   mkU8(bitM ? 0 : 16)),
10661                             mkU8(16))) );
10662
10663         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10664
10665         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_prod), mkexpr(irt_regA));
10666
10667         if (isT)
10668            putIRegT( regD, ire_result, condT );
10669         else
10670            putIRegA( regD, ire_result, condT, Ijk_Boring );
10671
10672         or_into_QFLAG32(
10673            signed_overflow_after_Add32( ire_result, irt_prod, irt_regA ),
10674            condT
10675         );
10676
10677         DIP( "smla%c%c%s r%u, r%u, r%u, r%u\n",
10678              bitN ? 't' : 'b', bitM ? 't' : 'b',
10679              nCC(conq), regD, regN, regM, regA );
10680         return True;
10681      }
10682      /* fall through */
10683    }
10684
10685    /* ----- smlalbb, smlalbt, smlaltb, smlaltt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10686    {
10687      UInt regDHi = 99, regN = 99, regM = 99, regDLo = 99, bitM = 99, bitN = 99;
10688      Bool gate = False;
10689
10690      if (isT) {
10691         if (INSNT0(15,4) == 0xFBC && INSNT1(7,6) == BITS2(1,0)) {
10692            regN   = INSNT0(3,0);
10693            regDHi = INSNT1(11,8);
10694            regM   = INSNT1(3,0);
10695            regDLo = INSNT1(15,12);
10696            bitM   = INSNT1(4,4);
10697            bitN   = INSNT1(5,5);
10698            if (!isBadRegT(regDHi) && !isBadRegT(regN) && !isBadRegT(regM)
10699                && !isBadRegT(regDLo) && regDHi != regDLo)
10700               gate = True;
10701         }
10702      } else {
10703         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
10704             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10705            regDHi = INSNA(19,16);
10706            regN   = INSNA(3,0);
10707            regM   = INSNA(11,8);
10708            regDLo = INSNA(15,12);
10709            bitM   = INSNA(6,6);
10710            bitN   = INSNA(5,5);
10711            if (regDHi != 15 && regN != 15 && regM != 15 && regDLo != 15 &&
10712                regDHi != regDLo)
10713               gate = True;
10714         }
10715      }
10716
10717      if (gate) {
10718         IRTemp irt_regD  = newTemp(Ity_I64);
10719         IRTemp irt_prod  = newTemp(Ity_I64);
10720         IRTemp irt_res   = newTemp(Ity_I64);
10721         IRTemp irt_resHi = newTemp(Ity_I32);
10722         IRTemp irt_resLo = newTemp(Ity_I32);
10723
10724         assign( irt_prod,
10725                 binop(Iop_MullS32,
10726                       binop(Iop_Sar32,
10727                             binop(Iop_Shl32,
10728                                   isT ? getIRegT(regN) : getIRegA(regN),
10729                                   mkU8(bitN ? 0 : 16)),
10730                             mkU8(16)),
10731                       binop(Iop_Sar32,
10732                             binop(Iop_Shl32,
10733                                   isT ? getIRegT(regM) : getIRegA(regM),
10734                                   mkU8(bitM ? 0 : 16)),
10735                             mkU8(16))) );
10736
10737         assign( irt_regD, binop(Iop_32HLto64,
10738                                 isT ? getIRegT(regDHi) : getIRegA(regDHi),
10739                                 isT ? getIRegT(regDLo) : getIRegA(regDLo)) );
10740         assign( irt_res, binop(Iop_Add64, mkexpr(irt_regD), mkexpr(irt_prod)) );
10741         assign( irt_resHi, unop(Iop_64HIto32, mkexpr(irt_res)) );
10742         assign( irt_resLo, unop(Iop_64to32, mkexpr(irt_res)) );
10743
10744         if (isT) {
10745            putIRegT( regDHi, mkexpr(irt_resHi), condT );
10746            putIRegT( regDLo, mkexpr(irt_resLo), condT );
10747         } else {
10748            putIRegA( regDHi, mkexpr(irt_resHi), condT, Ijk_Boring );
10749            putIRegA( regDLo, mkexpr(irt_resLo), condT, Ijk_Boring );
10750         }
10751
10752         DIP( "smlal%c%c%s r%u, r%u, r%u, r%u\n",
10753              bitN ? 't' : 'b', bitM ? 't' : 'b',
10754              nCC(conq), regDHi, regN, regM, regDLo );
10755         return True;
10756      }
10757      /* fall through */
10758    }
10759
10760    /* ----- smlawb, smlawt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10761    {
10762      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10763      Bool gate = False;
10764
10765      if (isT) {
10766         if (INSNT0(15,4) == 0xFB3 && INSNT1(7,5) == BITS3(0,0,0)) {
10767            regN = INSNT0(3,0);
10768            regD = INSNT1(11,8);
10769            regM = INSNT1(3,0);
10770            regA = INSNT1(15,12);
10771            bitM = INSNT1(4,4);
10772            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10773                && !isBadRegT(regA))
10774               gate = True;
10775         }
10776      } else {
10777         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
10778             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,0,0)) {
10779            regD = INSNA(19,16);
10780            regN = INSNA(3,0);
10781            regM = INSNA(11,8);
10782            regA = INSNA(15,12);
10783            bitM = INSNA(6,6);
10784            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10785               gate = True;
10786         }
10787      }
10788
10789      if (gate) {
10790         IRTemp irt_regA = newTemp(Ity_I32);
10791         IRTemp irt_prod = newTemp(Ity_I64);
10792
10793         assign( irt_prod,
10794                 binop(Iop_MullS32,
10795                       isT ? getIRegT(regN) : getIRegA(regN),
10796                       binop(Iop_Sar32,
10797                             binop(Iop_Shl32,
10798                                   isT ? getIRegT(regM) : getIRegA(regM),
10799                                   mkU8(bitM ? 0 : 16)),
10800                             mkU8(16))) );
10801
10802         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10803
10804         IRTemp prod32 = newTemp(Ity_I32);
10805         assign(prod32,
10806                binop(Iop_Or32,
10807                      binop(Iop_Shl32, unop(Iop_64HIto32, mkexpr(irt_prod)), mkU8(16)),
10808                      binop(Iop_Shr32, unop(Iop_64to32, mkexpr(irt_prod)), mkU8(16))
10809         ));
10810
10811         IRExpr* ire_result = binop(Iop_Add32, mkexpr(prod32), mkexpr(irt_regA));
10812
10813         if (isT)
10814            putIRegT( regD, ire_result, condT );
10815         else
10816            putIRegA( regD, ire_result, condT, Ijk_Boring );
10817
10818         or_into_QFLAG32(
10819            signed_overflow_after_Add32( ire_result, prod32, irt_regA ),
10820            condT
10821         );
10822
10823         DIP( "smlaw%c%s r%u, r%u, r%u, r%u\n",
10824              bitM ? 't' : 'b',
10825              nCC(conq), regD, regN, regM, regA );
10826         return True;
10827      }
10828      /* fall through */
10829    }
10830
10831    /* ------------------- sel<c> <Rd>,<Rn>,<Rm> -------------------- */
10832    /* fixme: fix up the test in v6media.c so that we can pass the ge
10833       flags as part of the test. */
10834    {
10835      UInt regD = 99, regN = 99, regM = 99;
10836      Bool gate = False;
10837
10838      if (isT) {
10839         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
10840            regN = INSNT0(3,0);
10841            regD = INSNT1(11,8);
10842            regM = INSNT1(3,0);
10843            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10844               gate = True;
10845         }
10846      } else {
10847         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
10848             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10849             INSNA(7,4)   == BITS4(1,0,1,1)) {
10850            regD = INSNA(15,12);
10851            regN = INSNA(19,16);
10852            regM = INSNA(3,0);
10853            if (regD != 15 && regN != 15 && regM != 15)
10854               gate = True;
10855         }
10856      }
10857
10858      if (gate) {
10859         IRTemp irt_ge_flag0 = newTemp(Ity_I32);
10860         IRTemp irt_ge_flag1 = newTemp(Ity_I32);
10861         IRTemp irt_ge_flag2 = newTemp(Ity_I32);
10862         IRTemp irt_ge_flag3 = newTemp(Ity_I32);
10863
10864         assign( irt_ge_flag0, get_GEFLAG32(0) );
10865         assign( irt_ge_flag1, get_GEFLAG32(1) );
10866         assign( irt_ge_flag2, get_GEFLAG32(2) );
10867         assign( irt_ge_flag3, get_GEFLAG32(3) );
10868
10869         IRExpr* ire_ge_flag0_or
10870           = binop(Iop_Or32, mkexpr(irt_ge_flag0),
10871                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag0)));
10872         IRExpr* ire_ge_flag1_or
10873           = binop(Iop_Or32, mkexpr(irt_ge_flag1),
10874                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag1)));
10875         IRExpr* ire_ge_flag2_or
10876           = binop(Iop_Or32, mkexpr(irt_ge_flag2),
10877                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag2)));
10878         IRExpr* ire_ge_flag3_or
10879           = binop(Iop_Or32, mkexpr(irt_ge_flag3),
10880                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag3)));
10881
10882         IRExpr* ire_ge_flags
10883           = binop( Iop_Or32,
10884                    binop(Iop_Or32,
10885                          binop(Iop_And32,
10886                                binop(Iop_Sar32, ire_ge_flag0_or, mkU8(31)),
10887                                mkU32(0x000000ff)),
10888                          binop(Iop_And32,
10889                                binop(Iop_Sar32, ire_ge_flag1_or, mkU8(31)),
10890                                mkU32(0x0000ff00))),
10891                    binop(Iop_Or32,
10892                          binop(Iop_And32,
10893                                binop(Iop_Sar32, ire_ge_flag2_or, mkU8(31)),
10894                                mkU32(0x00ff0000)),
10895                          binop(Iop_And32,
10896                                binop(Iop_Sar32, ire_ge_flag3_or, mkU8(31)),
10897                                mkU32(0xff000000))) );
10898
10899         IRExpr* ire_result
10900           = binop(Iop_Or32,
10901                   binop(Iop_And32,
10902                         isT ? getIRegT(regN) : getIRegA(regN),
10903                         ire_ge_flags ),
10904                   binop(Iop_And32,
10905                         isT ? getIRegT(regM) : getIRegA(regM),
10906                         unop(Iop_Not32, ire_ge_flags)));
10907
10908         if (isT)
10909            putIRegT( regD, ire_result, condT );
10910         else
10911            putIRegA( regD, ire_result, condT, Ijk_Boring );
10912
10913         DIP("sel%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10914         return True;
10915      }
10916      /* fall through */
10917    }
10918
10919    /* ----------------- uxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
10920    {
10921      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
10922      Bool gate = False;
10923
10924      if (isT) {
10925         if (INSNT0(15,4) == 0xFA3 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
10926            regN   = INSNT0(3,0);
10927            regD   = INSNT1(11,8);
10928            regM   = INSNT1(3,0);
10929            rotate = INSNT1(5,4);
10930            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10931               gate = True;
10932         }
10933      } else {
10934         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,0,0) &&
10935             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
10936            regD   = INSNA(15,12);
10937            regN   = INSNA(19,16);
10938            regM   = INSNA(3,0);
10939            rotate = INSNA(11,10);
10940            if (regD != 15 && regN != 15 && regM != 15)
10941              gate = True;
10942         }
10943      }
10944
10945      if (gate) {
10946         IRTemp irt_regN = newTemp(Ity_I32);
10947         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10948
10949         IRTemp irt_regM = newTemp(Ity_I32);
10950         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10951
10952         IRTemp irt_rot = newTemp(Ity_I32);
10953         assign( irt_rot, binop(Iop_And32,
10954                                genROR32(irt_regM, 8 * rotate),
10955                                mkU32(0x00FF00FF)) );
10956
10957         IRExpr* resLo
10958            = binop(Iop_And32,
10959                    binop(Iop_Add32, mkexpr(irt_regN), mkexpr(irt_rot)),
10960                    mkU32(0x0000FFFF));
10961
10962         IRExpr* resHi
10963            = binop(Iop_Add32,
10964                    binop(Iop_And32, mkexpr(irt_regN), mkU32(0xFFFF0000)),
10965                    binop(Iop_And32, mkexpr(irt_rot),  mkU32(0xFFFF0000)));
10966
10967         IRExpr* ire_result
10968            = binop( Iop_Or32, resHi, resLo );
10969
10970         if (isT)
10971            putIRegT( regD, ire_result, condT );
10972         else
10973            putIRegA( regD, ire_result, condT, Ijk_Boring );
10974
10975         DIP( "uxtab16%s r%u, r%u, r%u, ROR #%u\n",
10976              nCC(conq), regD, regN, regM, 8 * rotate );
10977         return True;
10978      }
10979      /* fall through */
10980    }
10981
10982    /* --------------- usad8  Rd,Rn,Rm    ---------------- */
10983    /* --------------- usada8 Rd,Rn,Rm,Ra ---------------- */
10984    {
10985      UInt rD = 99, rN = 99, rM = 99, rA = 99;
10986      Bool gate = False;
10987
10988      if (isT) {
10989        if (INSNT0(15,4) == 0xFB7 && INSNT1(7,4) == BITS4(0,0,0,0)) {
10990            rN = INSNT0(3,0);
10991            rA = INSNT1(15,12);
10992            rD = INSNT1(11,8);
10993            rM = INSNT1(3,0);
10994            if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && rA != 13)
10995               gate = True;
10996         }
10997      } else {
10998         if (INSNA(27,20) == BITS8(0,1,1,1,1,0,0,0) &&
10999             INSNA(7,4)   == BITS4(0,0,0,1) ) {
11000            rD = INSNA(19,16);
11001            rA = INSNA(15,12);
11002            rM = INSNA(11,8);
11003            rN = INSNA(3,0);
11004            if (rD != 15 && rN != 15 && rM != 15 /* but rA can be 15 */)
11005               gate = True;
11006         }
11007      }
11008      /* We allow rA == 15, to denote the usad8 (no accumulator) case. */
11009
11010      if (gate) {
11011         IRExpr* rNe = isT ? getIRegT(rN) : getIRegA(rN);
11012         IRExpr* rMe = isT ? getIRegT(rM) : getIRegA(rM);
11013         IRExpr* rAe = rA == 15 ? mkU32(0)
11014                                : (isT ? getIRegT(rA) : getIRegA(rA));
11015         IRExpr* res = binop(Iop_Add32,
11016                             binop(Iop_Sad8Ux4, rNe, rMe),
11017                             rAe);
11018         if (isT)
11019            putIRegT( rD, res, condT );
11020         else
11021            putIRegA( rD, res, condT, Ijk_Boring );
11022
11023         if (rA == 15) {
11024            DIP( "usad8%s r%u, r%u, r%u\n",
11025                 nCC(conq), rD, rN, rM );
11026         } else {
11027            DIP( "usada8%s r%u, r%u, r%u, r%u\n",
11028                 nCC(conq), rD, rN, rM, rA );
11029         }
11030         return True;
11031      }
11032      /* fall through */
11033    }
11034
11035    /* ------------------ qadd<c> <Rd>,<Rn>,<Rm> ------------------- */
11036    {
11037      UInt regD = 99, regN = 99, regM = 99;
11038      Bool gate = False;
11039
11040      if (isT) {
11041         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
11042            regN = INSNT0(3,0);
11043            regD = INSNT1(11,8);
11044            regM = INSNT1(3,0);
11045            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11046               gate = True;
11047         }
11048      } else {
11049         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
11050             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11051             INSNA(7,4)   == BITS4(0,1,0,1)) {
11052            regD = INSNA(15,12);
11053            regN = INSNA(19,16);
11054            regM = INSNA(3,0);
11055            if (regD != 15 && regN != 15 && regM != 15)
11056               gate = True;
11057         }
11058      }
11059
11060      if (gate) {
11061         IRTemp rNt   = newTemp(Ity_I32);
11062         IRTemp rMt   = newTemp(Ity_I32);
11063         IRTemp res_q = newTemp(Ity_I32);
11064
11065         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11066         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11067
11068         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt)));
11069         if (isT)
11070            putIRegT( regD, mkexpr(res_q), condT );
11071         else
11072            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11073
11074         or_into_QFLAG32(
11075            signed_overflow_after_Add32(
11076               binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11077            condT
11078         );
11079
11080         DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11081         return True;
11082      }
11083      /* fall through */
11084    }
11085
11086    /* ------------------ qdadd<c> <Rd>,<Rm>,<Rn> ------------------- */
11087    {
11088      UInt regD = 99, regN = 99, regM = 99;
11089      Bool gate = False;
11090
11091      if (isT) {
11092         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF090) {
11093            regN = INSNT0(3,0);
11094            regD = INSNT1(11,8);
11095            regM = INSNT1(3,0);
11096            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11097               gate = True;
11098         }
11099      } else {
11100         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
11101             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11102             INSNA(7,4)   == BITS4(0,1,0,1)) {
11103            regD = INSNA(15,12);
11104            regN = INSNA(19,16);
11105            regM = INSNA(3,0);
11106            if (regD != 15 && regN != 15 && regM != 15)
11107               gate = True;
11108         }
11109      }
11110
11111      if (gate) {
11112         IRTemp rNt   = newTemp(Ity_I32);
11113         IRTemp rMt   = newTemp(Ity_I32);
11114         IRTemp rN_d  = newTemp(Ity_I32);
11115         IRTemp res_q = newTemp(Ity_I32);
11116
11117         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11118         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11119
11120         or_into_QFLAG32(
11121            signed_overflow_after_Add32(
11122               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11123            condT
11124         );
11125
11126         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11127         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rN_d)));
11128         if (isT)
11129            putIRegT( regD, mkexpr(res_q), condT );
11130         else
11131            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11132
11133         or_into_QFLAG32(
11134            signed_overflow_after_Add32(
11135               binop(Iop_Add32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11136            condT
11137         );
11138
11139         DIP("qdadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11140         return True;
11141      }
11142      /* fall through */
11143    }
11144
11145    /* ------------------ qsub<c> <Rd>,<Rn>,<Rm> ------------------- */
11146    {
11147      UInt regD = 99, regN = 99, regM = 99;
11148      Bool gate = False;
11149
11150      if (isT) {
11151         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) {
11152            regN = INSNT0(3,0);
11153            regD = INSNT1(11,8);
11154            regM = INSNT1(3,0);
11155            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11156               gate = True;
11157         }
11158      } else {
11159         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
11160             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11161             INSNA(7,4)   == BITS4(0,1,0,1)) {
11162            regD = INSNA(15,12);
11163            regN = INSNA(19,16);
11164            regM = INSNA(3,0);
11165            if (regD != 15 && regN != 15 && regM != 15)
11166               gate = True;
11167         }
11168      }
11169
11170      if (gate) {
11171         IRTemp rNt   = newTemp(Ity_I32);
11172         IRTemp rMt   = newTemp(Ity_I32);
11173         IRTemp res_q = newTemp(Ity_I32);
11174
11175         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11176         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11177
11178         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt)));
11179         if (isT)
11180            putIRegT( regD, mkexpr(res_q), condT );
11181         else
11182            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11183
11184         or_into_QFLAG32(
11185            signed_overflow_after_Sub32(
11186               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11187            condT
11188         );
11189
11190         DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11191         return True;
11192      }
11193      /* fall through */
11194    }
11195
11196    /* ------------------ qdsub<c> <Rd>,<Rm>,<Rn> ------------------- */
11197    {
11198      UInt regD = 99, regN = 99, regM = 99;
11199      Bool gate = False;
11200
11201      if (isT) {
11202         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0B0) {
11203            regN = INSNT0(3,0);
11204            regD = INSNT1(11,8);
11205            regM = INSNT1(3,0);
11206            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11207               gate = True;
11208         }
11209      } else {
11210         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,1,0) &&
11211             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11212             INSNA(7,4)   == BITS4(0,1,0,1)) {
11213            regD = INSNA(15,12);
11214            regN = INSNA(19,16);
11215            regM = INSNA(3,0);
11216            if (regD != 15 && regN != 15 && regM != 15)
11217               gate = True;
11218         }
11219      }
11220
11221      if (gate) {
11222         IRTemp rNt   = newTemp(Ity_I32);
11223         IRTemp rMt   = newTemp(Ity_I32);
11224         IRTemp rN_d  = newTemp(Ity_I32);
11225         IRTemp res_q = newTemp(Ity_I32);
11226
11227         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11228         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11229
11230         or_into_QFLAG32(
11231            signed_overflow_after_Add32(
11232               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11233            condT
11234         );
11235
11236         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11237         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rN_d)));
11238         if (isT)
11239            putIRegT( regD, mkexpr(res_q), condT );
11240         else
11241            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11242
11243         or_into_QFLAG32(
11244            signed_overflow_after_Sub32(
11245               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11246            condT
11247         );
11248
11249         DIP("qdsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11250         return True;
11251      }
11252      /* fall through */
11253    }
11254
11255    /* ------------------ uqsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
11256    {
11257      UInt regD = 99, regN = 99, regM = 99;
11258      Bool gate = False;
11259
11260      if (isT) {
11261         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11262            regN = INSNT0(3,0);
11263            regD = INSNT1(11,8);
11264            regM = INSNT1(3,0);
11265            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11266               gate = True;
11267         }
11268      } else {
11269         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11270             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11271             INSNA(7,4)   == BITS4(0,1,1,1)) {
11272            regD = INSNA(15,12);
11273            regN = INSNA(19,16);
11274            regM = INSNA(3,0);
11275            if (regD != 15 && regN != 15 && regM != 15)
11276              gate = True;
11277         }
11278      }
11279
11280      if (gate) {
11281         IRTemp rNt   = newTemp(Ity_I32);
11282         IRTemp rMt   = newTemp(Ity_I32);
11283         IRTemp res_q = newTemp(Ity_I32);
11284
11285         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11286         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11287
11288         assign(res_q, binop(Iop_QSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11289         if (isT)
11290            putIRegT( regD, mkexpr(res_q), condT );
11291         else
11292            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11293
11294         DIP("uqsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11295         return True;
11296      }
11297      /* fall through */
11298    }
11299
11300    /* ----------------- shadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
11301    {
11302      UInt regD = 99, regN = 99, regM = 99;
11303      Bool gate = False;
11304
11305      if (isT) {
11306         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11307            regN = INSNT0(3,0);
11308            regD = INSNT1(11,8);
11309            regM = INSNT1(3,0);
11310            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11311               gate = True;
11312         }
11313      } else {
11314         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11315             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11316             INSNA(7,4)   == BITS4(0,0,0,1)) {
11317            regD = INSNA(15,12);
11318            regN = INSNA(19,16);
11319            regM = INSNA(3,0);
11320            if (regD != 15 && regN != 15 && regM != 15)
11321               gate = True;
11322         }
11323      }
11324
11325      if (gate) {
11326         IRTemp rNt   = newTemp(Ity_I32);
11327         IRTemp rMt   = newTemp(Ity_I32);
11328         IRTemp res_q = newTemp(Ity_I32);
11329
11330         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11331         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11332
11333         assign(res_q, binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
11334         if (isT)
11335            putIRegT( regD, mkexpr(res_q), condT );
11336         else
11337            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11338
11339         DIP("shadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11340         return True;
11341      }
11342      /* fall through */
11343    }
11344
11345    /* ----------------- uhsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11346    {
11347      UInt regD = 99, regN = 99, regM = 99;
11348      Bool gate = False;
11349
11350      if (isT) {
11351         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11352            regN = INSNT0(3,0);
11353            regD = INSNT1(11,8);
11354            regM = INSNT1(3,0);
11355            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11356               gate = True;
11357         }
11358      } else {
11359         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11360             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11361             INSNA(7,4)   == BITS4(1,1,1,1)) {
11362            regD = INSNA(15,12);
11363            regN = INSNA(19,16);
11364            regM = INSNA(3,0);
11365            if (regD != 15 && regN != 15 && regM != 15)
11366               gate = True;
11367         }
11368      }
11369
11370      if (gate) {
11371         IRTemp rNt   = newTemp(Ity_I32);
11372         IRTemp rMt   = newTemp(Ity_I32);
11373         IRTemp res_q = newTemp(Ity_I32);
11374
11375         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11376         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11377
11378         assign(res_q, binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
11379         if (isT)
11380            putIRegT( regD, mkexpr(res_q), condT );
11381         else
11382            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11383
11384         DIP("uhsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11385         return True;
11386      }
11387      /* fall through */
11388    }
11389
11390    /* ----------------- uhsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
11391    {
11392      UInt regD = 99, regN = 99, regM = 99;
11393      Bool gate = False;
11394
11395      if (isT) {
11396         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11397            regN = INSNT0(3,0);
11398            regD = INSNT1(11,8);
11399            regM = INSNT1(3,0);
11400            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11401               gate = True;
11402         }
11403      } else {
11404         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11405             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11406             INSNA(7,4)   == BITS4(0,1,1,1)) {
11407            regD = INSNA(15,12);
11408            regN = INSNA(19,16);
11409            regM = INSNA(3,0);
11410            if (regD != 15 && regN != 15 && regM != 15)
11411               gate = True;
11412         }
11413      }
11414
11415      if (gate) {
11416         IRTemp rNt   = newTemp(Ity_I32);
11417         IRTemp rMt   = newTemp(Ity_I32);
11418         IRTemp res_q = newTemp(Ity_I32);
11419
11420         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11421         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11422
11423         assign(res_q, binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11424         if (isT)
11425            putIRegT( regD, mkexpr(res_q), condT );
11426         else
11427            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11428
11429         DIP("uhsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11430         return True;
11431      }
11432      /* fall through */
11433    }
11434
11435    /* ------------------ uqadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
11436    {
11437      UInt regD = 99, regN = 99, regM = 99;
11438      Bool gate = False;
11439
11440      if (isT) {
11441         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11442            regN = INSNT0(3,0);
11443            regD = INSNT1(11,8);
11444            regM = INSNT1(3,0);
11445            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11446               gate = True;
11447         }
11448      } else {
11449         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11450             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11451             INSNA(7,4)   == BITS4(0,0,0,1)) {
11452            regD = INSNA(15,12);
11453            regN = INSNA(19,16);
11454            regM = INSNA(3,0);
11455            if (regD != 15 && regN != 15 && regM != 15)
11456               gate = True;
11457         }
11458      }
11459
11460      if (gate) {
11461         IRTemp rNt   = newTemp(Ity_I32);
11462         IRTemp rMt   = newTemp(Ity_I32);
11463         IRTemp res_q = newTemp(Ity_I32);
11464
11465         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11466         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11467
11468         assign(res_q, binop(Iop_QAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
11469         if (isT)
11470            putIRegT( regD, mkexpr(res_q), condT );
11471         else
11472            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11473
11474         DIP("uqadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11475         return True;
11476      }
11477      /* fall through */
11478    }
11479
11480    /* ------------------- uqsax<c> <Rd>,<Rn>,<Rm> ------------------- */
11481    {
11482      UInt regD = 99, regN = 99, regM = 99;
11483      Bool gate = False;
11484
11485      if (isT) {
11486         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11487            regN = INSNT0(3,0);
11488            regD = INSNT1(11,8);
11489            regM = INSNT1(3,0);
11490            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11491               gate = True;
11492         }
11493      } else {
11494         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11495             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11496             INSNA(7,4)   == BITS4(0,1,0,1)) {
11497            regD = INSNA(15,12);
11498            regN = INSNA(19,16);
11499            regM = INSNA(3,0);
11500            if (regD != 15 && regN != 15 && regM != 15)
11501               gate = True;
11502         }
11503      }
11504
11505      if (gate) {
11506         IRTemp irt_regN     = newTemp(Ity_I32);
11507         IRTemp irt_regM     = newTemp(Ity_I32);
11508         IRTemp irt_sum      = newTemp(Ity_I32);
11509         IRTemp irt_diff     = newTemp(Ity_I32);
11510         IRTemp irt_sum_res  = newTemp(Ity_I32);
11511         IRTemp irt_diff_res = newTemp(Ity_I32);
11512
11513         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11514         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11515
11516         assign( irt_diff,
11517                 binop( Iop_Sub32,
11518                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11519                        binop( Iop_Shr32,
11520                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
11521                               mkU8(16) ) ) );
11522         armUnsignedSatQ( &irt_diff_res, NULL, irt_diff, 0x10);
11523
11524         assign( irt_sum,
11525                 binop( Iop_Add32,
11526                        binop( Iop_Shr32,
11527                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11528                               mkU8(16) ),
11529                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) )) );
11530         armUnsignedSatQ( &irt_sum_res, NULL, irt_sum, 0x10 );
11531
11532         IRExpr* ire_result = binop( Iop_Or32,
11533                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
11534                                            mkU8(16) ),
11535                                     binop( Iop_And32, mkexpr(irt_sum_res),
11536                                            mkU32(0xFFFF)) );
11537
11538         if (isT)
11539            putIRegT( regD, ire_result, condT );
11540         else
11541            putIRegA( regD, ire_result, condT, Ijk_Boring );
11542
11543         DIP( "uqsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11544         return True;
11545      }
11546      /* fall through */
11547    }
11548
11549    /* ------------------- uqasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11550    {
11551      UInt regD = 99, regN = 99, regM = 99;
11552      Bool gate = False;
11553
11554      if (isT) {
11555         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11556            regN = INSNT0(3,0);
11557            regD = INSNT1(11,8);
11558            regM = INSNT1(3,0);
11559            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11560               gate = True;
11561         }
11562      } else {
11563         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11564             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11565             INSNA(7,4)   == BITS4(0,0,1,1)) {
11566            regD = INSNA(15,12);
11567            regN = INSNA(19,16);
11568            regM = INSNA(3,0);
11569            if (regD != 15 && regN != 15 && regM != 15)
11570               gate = True;
11571         }
11572      }
11573
11574      if (gate) {
11575         IRTemp irt_regN     = newTemp(Ity_I32);
11576         IRTemp irt_regM     = newTemp(Ity_I32);
11577         IRTemp irt_sum      = newTemp(Ity_I32);
11578         IRTemp irt_diff     = newTemp(Ity_I32);
11579         IRTemp irt_res_sum  = newTemp(Ity_I32);
11580         IRTemp irt_res_diff = newTemp(Ity_I32);
11581
11582         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11583         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11584
11585         assign( irt_diff,
11586                 binop( Iop_Sub32,
11587                        binop( Iop_Shr32,
11588                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11589                               mkU8(16) ),
11590                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11591         armUnsignedSatQ( &irt_res_diff, NULL, irt_diff, 0x10 );
11592
11593         assign( irt_sum,
11594                 binop( Iop_Add32,
11595                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11596                        binop( Iop_Shr32,
11597                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11598                               mkU8(16) ) ) );
11599         armUnsignedSatQ( &irt_res_sum, NULL, irt_sum, 0x10 );
11600
11601         IRExpr* ire_result
11602           = binop( Iop_Or32,
11603                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
11604                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
11605
11606         if (isT)
11607            putIRegT( regD, ire_result, condT );
11608         else
11609            putIRegA( regD, ire_result, condT, Ijk_Boring );
11610
11611         DIP( "uqasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11612         return True;
11613      }
11614      /* fall through */
11615    }
11616
11617    /* ------------------- usax<c> <Rd>,<Rn>,<Rm> ------------------- */
11618    {
11619      UInt regD = 99, regN = 99, regM = 99;
11620      Bool gate = False;
11621
11622      if (isT) {
11623         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11624            regN = INSNT0(3,0);
11625            regD = INSNT1(11,8);
11626            regM = INSNT1(3,0);
11627            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11628               gate = True;
11629         }
11630      } else {
11631         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11632             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11633             INSNA(7,4)   == BITS4(0,1,0,1)) {
11634            regD = INSNA(15,12);
11635            regN = INSNA(19,16);
11636            regM = INSNA(3,0);
11637            if (regD != 15 && regN != 15 && regM != 15)
11638               gate = True;
11639         }
11640      }
11641
11642      if (gate) {
11643         IRTemp irt_regN = newTemp(Ity_I32);
11644         IRTemp irt_regM = newTemp(Ity_I32);
11645         IRTemp irt_sum  = newTemp(Ity_I32);
11646         IRTemp irt_diff = newTemp(Ity_I32);
11647
11648         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11649         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11650
11651         assign( irt_sum,
11652                 binop( Iop_Add32,
11653                        unop( Iop_16Uto32,
11654                              unop( Iop_32to16, mkexpr(irt_regN) )
11655                        ),
11656                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11657
11658         assign( irt_diff,
11659                 binop( Iop_Sub32,
11660                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11661                        unop( Iop_16Uto32,
11662                              unop( Iop_32to16, mkexpr(irt_regM) )
11663                        )
11664                 )
11665         );
11666
11667         IRExpr* ire_result
11668           = binop( Iop_Or32,
11669                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11670                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11671
11672         IRTemp ge10 = newTemp(Ity_I32);
11673         assign( ge10, IRExpr_ITE( binop( Iop_CmpLE32U,
11674                                          mkU32(0x10000), mkexpr(irt_sum) ),
11675                                   mkU32(1), mkU32(0) ) );
11676         put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
11677         put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
11678
11679         IRTemp ge32 = newTemp(Ity_I32);
11680         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11681         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11682         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11683
11684         if (isT)
11685            putIRegT( regD, ire_result, condT );
11686         else
11687            putIRegA( regD, ire_result, condT, Ijk_Boring );
11688
11689         DIP( "usax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11690         return True;
11691      }
11692      /* fall through */
11693    }
11694
11695    /* ------------------- uasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11696    {
11697      UInt regD = 99, regN = 99, regM = 99;
11698      Bool gate = False;
11699
11700      if (isT) {
11701         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11702            regN = INSNT0(3,0);
11703            regD = INSNT1(11,8);
11704            regM = INSNT1(3,0);
11705            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11706               gate = True;
11707         }
11708      } else {
11709         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11710             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11711             INSNA(7,4)   == BITS4(0,0,1,1)) {
11712            regD = INSNA(15,12);
11713            regN = INSNA(19,16);
11714            regM = INSNA(3,0);
11715            if (regD != 15 && regN != 15 && regM != 15)
11716               gate = True;
11717         }
11718      }
11719
11720      if (gate) {
11721         IRTemp irt_regN = newTemp(Ity_I32);
11722         IRTemp irt_regM = newTemp(Ity_I32);
11723         IRTemp irt_sum  = newTemp(Ity_I32);
11724         IRTemp irt_diff = newTemp(Ity_I32);
11725
11726         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11727         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11728
11729         assign( irt_diff,
11730                 binop( Iop_Sub32,
11731                        unop( Iop_16Uto32,
11732                              unop( Iop_32to16, mkexpr(irt_regN) )
11733                        ),
11734                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11735
11736         assign( irt_sum,
11737                 binop( Iop_Add32,
11738                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11739                        unop( Iop_16Uto32,
11740                              unop( Iop_32to16, mkexpr(irt_regM) )
11741                        ) ) );
11742
11743         IRExpr* ire_result
11744           = binop( Iop_Or32,
11745                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
11746                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
11747
11748         IRTemp ge10 = newTemp(Ity_I32);
11749         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
11750         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11751         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11752
11753         IRTemp ge32 = newTemp(Ity_I32);
11754         assign( ge32, IRExpr_ITE( binop( Iop_CmpLE32U,
11755                                          mkU32(0x10000), mkexpr(irt_sum) ),
11756                                   mkU32(1), mkU32(0) ) );
11757         put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
11758         put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
11759
11760         if (isT)
11761            putIRegT( regD, ire_result, condT );
11762         else
11763            putIRegA( regD, ire_result, condT, Ijk_Boring );
11764
11765         DIP( "uasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11766         return True;
11767      }
11768      /* fall through */
11769    }
11770
11771    /* ------------------- ssax<c> <Rd>,<Rn>,<Rm> ------------------- */
11772    {
11773      UInt regD = 99, regN = 99, regM = 99;
11774      Bool gate = False;
11775
11776      if (isT) {
11777         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
11778            regN = INSNT0(3,0);
11779            regD = INSNT1(11,8);
11780            regM = INSNT1(3,0);
11781            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11782               gate = True;
11783         }
11784      } else {
11785         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
11786             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11787             INSNA(7,4)   == BITS4(0,1,0,1)) {
11788            regD = INSNA(15,12);
11789            regN = INSNA(19,16);
11790            regM = INSNA(3,0);
11791            if (regD != 15 && regN != 15 && regM != 15)
11792               gate = True;
11793         }
11794      }
11795
11796      if (gate) {
11797         IRTemp irt_regN = newTemp(Ity_I32);
11798         IRTemp irt_regM = newTemp(Ity_I32);
11799         IRTemp irt_sum  = newTemp(Ity_I32);
11800         IRTemp irt_diff = newTemp(Ity_I32);
11801
11802         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11803         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11804
11805         assign( irt_sum,
11806                 binop( Iop_Add32,
11807                        binop( Iop_Sar32,
11808                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11809                               mkU8(16) ),
11810                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
11811
11812         assign( irt_diff,
11813                 binop( Iop_Sub32,
11814                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
11815                        binop( Iop_Sar32,
11816                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11817                               mkU8(16) ) ) );
11818
11819         IRExpr* ire_result
11820           = binop( Iop_Or32,
11821                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11822                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11823
11824         IRTemp ge10 = newTemp(Ity_I32);
11825         assign(ge10, unop(Iop_Not32, mkexpr(irt_sum)));
11826         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11827         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11828
11829         IRTemp ge32 = newTemp(Ity_I32);
11830         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11831         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11832         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11833
11834         if (isT)
11835            putIRegT( regD, ire_result, condT );
11836         else
11837            putIRegA( regD, ire_result, condT, Ijk_Boring );
11838
11839         DIP( "ssax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11840         return True;
11841      }
11842      /* fall through */
11843    }
11844
11845    /* ----------------- shsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11846    {
11847      UInt regD = 99, regN = 99, regM = 99;
11848      Bool gate = False;
11849
11850      if (isT) {
11851         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11852            regN = INSNT0(3,0);
11853            regD = INSNT1(11,8);
11854            regM = INSNT1(3,0);
11855            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11856               gate = True;
11857         }
11858      } else {
11859         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11860             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11861             INSNA(7,4)   == BITS4(1,1,1,1)) {
11862            regD = INSNA(15,12);
11863            regN = INSNA(19,16);
11864            regM = INSNA(3,0);
11865            if (regD != 15 && regN != 15 && regM != 15)
11866               gate = True;
11867         }
11868      }
11869
11870      if (gate) {
11871         IRTemp rNt   = newTemp(Ity_I32);
11872         IRTemp rMt   = newTemp(Ity_I32);
11873         IRTemp res_q = newTemp(Ity_I32);
11874
11875         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11876         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11877
11878         assign(res_q, binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
11879         if (isT)
11880            putIRegT( regD, mkexpr(res_q), condT );
11881         else
11882            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11883
11884         DIP("shsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11885         return True;
11886      }
11887      /* fall through */
11888    }
11889
11890    /* ----------------- sxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
11891    {
11892      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
11893      Bool gate = False;
11894
11895      if (isT) {
11896         if (INSNT0(15,4) == 0xFA2 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
11897            regN   = INSNT0(3,0);
11898            regD   = INSNT1(11,8);
11899            regM   = INSNT1(3,0);
11900            rotate = INSNT1(5,4);
11901            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11902               gate = True;
11903         }
11904      } else {
11905         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
11906             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
11907            regD   = INSNA(15,12);
11908            regN   = INSNA(19,16);
11909            regM   = INSNA(3,0);
11910            rotate = INSNA(11,10);
11911            if (regD != 15 && regN != 15 && regM != 15)
11912              gate = True;
11913         }
11914      }
11915
11916      if (gate) {
11917         IRTemp irt_regN = newTemp(Ity_I32);
11918         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11919
11920         IRTemp irt_regM = newTemp(Ity_I32);
11921         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11922
11923         IRTemp irt_rot = newTemp(Ity_I32);
11924         assign( irt_rot, genROR32(irt_regM, 8 * rotate) );
11925
11926         /* FIXME Maybe we can write this arithmetic in shorter form. */
11927         IRExpr* resLo
11928            = binop(Iop_And32,
11929                    binop(Iop_Add32,
11930                          mkexpr(irt_regN),
11931                          unop(Iop_16Uto32,
11932                               unop(Iop_8Sto16,
11933                                    unop(Iop_32to8, mkexpr(irt_rot))))),
11934                    mkU32(0x0000FFFF));
11935
11936         IRExpr* resHi
11937            = binop(Iop_And32,
11938                    binop(Iop_Add32,
11939                          mkexpr(irt_regN),
11940                          binop(Iop_Shl32,
11941                                unop(Iop_16Uto32,
11942                                     unop(Iop_8Sto16,
11943                                          unop(Iop_32to8,
11944                                               binop(Iop_Shr32,
11945                                                     mkexpr(irt_rot),
11946                                                     mkU8(16))))),
11947                                mkU8(16))),
11948                    mkU32(0xFFFF0000));
11949
11950         IRExpr* ire_result
11951            = binop( Iop_Or32, resHi, resLo );
11952
11953         if (isT)
11954            putIRegT( regD, ire_result, condT );
11955         else
11956            putIRegA( regD, ire_result, condT, Ijk_Boring );
11957
11958         DIP( "sxtab16%s r%u, r%u, r%u, ROR #%u\n",
11959              nCC(conq), regD, regN, regM, 8 * rotate );
11960         return True;
11961      }
11962      /* fall through */
11963    }
11964
11965    /* ----------------- shasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11966    {
11967      UInt regD = 99, regN = 99, regM = 99;
11968      Bool gate = False;
11969
11970      if (isT) {
11971         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11972            regN = INSNT0(3,0);
11973            regD = INSNT1(11,8);
11974            regM = INSNT1(3,0);
11975            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11976               gate = True;
11977         }
11978      } else {
11979         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11980             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11981             INSNA(7,4)   == BITS4(0,0,1,1)) {
11982            regD = INSNA(15,12);
11983            regN = INSNA(19,16);
11984            regM = INSNA(3,0);
11985            if (regD != 15 && regN != 15 && regM != 15)
11986               gate = True;
11987         }
11988      }
11989
11990      if (gate) {
11991         IRTemp rNt   = newTemp(Ity_I32);
11992         IRTemp rMt   = newTemp(Ity_I32);
11993         IRTemp irt_diff  = newTemp(Ity_I32);
11994         IRTemp irt_sum   = newTemp(Ity_I32);
11995         IRTemp res_q = newTemp(Ity_I32);
11996
11997         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11998         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11999
12000         assign( irt_diff,
12001                 binop(Iop_Sub32,
12002                       unop(Iop_16Sto32,
12003                            unop(Iop_32to16,
12004                                 mkexpr(rNt)
12005                            )
12006                       ),
12007                       unop(Iop_16Sto32,
12008                            unop(Iop_32to16,
12009                                 binop(Iop_Shr32,
12010                                       mkexpr(rMt), mkU8(16)
12011                                 )
12012                            )
12013                       )
12014                 )
12015         );
12016
12017         assign( irt_sum,
12018                 binop(Iop_Add32,
12019                       unop(Iop_16Sto32,
12020                            unop(Iop_32to16,
12021                                 binop(Iop_Shr32,
12022                                       mkexpr(rNt), mkU8(16)
12023                                 )
12024                            )
12025                       ),
12026                       unop(Iop_16Sto32,
12027                            unop(Iop_32to16, mkexpr(rMt)
12028                            )
12029                       )
12030                 )
12031         );
12032
12033         assign( res_q,
12034                 binop(Iop_Or32,
12035                       unop(Iop_16Uto32,
12036                            unop(Iop_32to16,
12037                                 binop(Iop_Shr32,
12038                                       mkexpr(irt_diff), mkU8(1)
12039                                 )
12040                            )
12041                       ),
12042                       binop(Iop_Shl32,
12043                             binop(Iop_Shr32,
12044                                   mkexpr(irt_sum), mkU8(1)
12045                             ),
12046                             mkU8(16)
12047                      )
12048                 )
12049         );
12050
12051         if (isT)
12052            putIRegT( regD, mkexpr(res_q), condT );
12053         else
12054            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12055
12056         DIP("shasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12057         return True;
12058      }
12059      /* fall through */
12060    }
12061
12062    /* ----------------- uhasx<c> <Rd>,<Rn>,<Rm> ------------------- */
12063    {
12064      UInt regD = 99, regN = 99, regM = 99;
12065      Bool gate = False;
12066
12067      if (isT) {
12068         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12069            regN = INSNT0(3,0);
12070            regD = INSNT1(11,8);
12071            regM = INSNT1(3,0);
12072            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12073               gate = True;
12074         }
12075      } else {
12076         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12077             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12078             INSNA(7,4)   == BITS4(0,0,1,1)) {
12079            regD = INSNA(15,12);
12080            regN = INSNA(19,16);
12081            regM = INSNA(3,0);
12082            if (regD != 15 && regN != 15 && regM != 15)
12083               gate = True;
12084         }
12085      }
12086
12087      if (gate) {
12088         IRTemp rNt   = newTemp(Ity_I32);
12089         IRTemp rMt   = newTemp(Ity_I32);
12090         IRTemp irt_diff  = newTemp(Ity_I32);
12091         IRTemp irt_sum   = newTemp(Ity_I32);
12092         IRTemp res_q = newTemp(Ity_I32);
12093
12094         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12095         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12096
12097         assign( irt_diff,
12098                 binop(Iop_Sub32,
12099                       unop(Iop_16Uto32,
12100                            unop(Iop_32to16,
12101                                 mkexpr(rNt)
12102                            )
12103                       ),
12104                       unop(Iop_16Uto32,
12105                            unop(Iop_32to16,
12106                                 binop(Iop_Shr32,
12107                                       mkexpr(rMt), mkU8(16)
12108                                 )
12109                            )
12110                       )
12111                 )
12112         );
12113
12114         assign( irt_sum,
12115                 binop(Iop_Add32,
12116                       unop(Iop_16Uto32,
12117                            unop(Iop_32to16,
12118                                 binop(Iop_Shr32,
12119                                       mkexpr(rNt), mkU8(16)
12120                                 )
12121                            )
12122                       ),
12123                       unop(Iop_16Uto32,
12124                            unop(Iop_32to16, mkexpr(rMt)
12125                            )
12126                       )
12127                 )
12128         );
12129
12130         assign( res_q,
12131                 binop(Iop_Or32,
12132                       unop(Iop_16Uto32,
12133                            unop(Iop_32to16,
12134                                 binop(Iop_Shr32,
12135                                       mkexpr(irt_diff), mkU8(1)
12136                                 )
12137                            )
12138                       ),
12139                       binop(Iop_Shl32,
12140                             binop(Iop_Shr32,
12141                                   mkexpr(irt_sum), mkU8(1)
12142                             ),
12143                             mkU8(16)
12144                      )
12145                 )
12146         );
12147
12148         if (isT)
12149            putIRegT( regD, mkexpr(res_q), condT );
12150         else
12151            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12152
12153         DIP("uhasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12154         return True;
12155      }
12156      /* fall through */
12157    }
12158
12159    /* ----------------- shsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12160    {
12161      UInt regD = 99, regN = 99, regM = 99;
12162      Bool gate = False;
12163
12164      if (isT) {
12165         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12166            regN = INSNT0(3,0);
12167            regD = INSNT1(11,8);
12168            regM = INSNT1(3,0);
12169            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12170               gate = True;
12171         }
12172      } else {
12173         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12174             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12175             INSNA(7,4)   == BITS4(0,1,0,1)) {
12176            regD = INSNA(15,12);
12177            regN = INSNA(19,16);
12178            regM = INSNA(3,0);
12179            if (regD != 15 && regN != 15 && regM != 15)
12180               gate = True;
12181         }
12182      }
12183
12184      if (gate) {
12185         IRTemp rNt   = newTemp(Ity_I32);
12186         IRTemp rMt   = newTemp(Ity_I32);
12187         IRTemp irt_diff  = newTemp(Ity_I32);
12188         IRTemp irt_sum   = newTemp(Ity_I32);
12189         IRTemp res_q = newTemp(Ity_I32);
12190
12191         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12192         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12193
12194         assign( irt_sum,
12195                 binop(Iop_Add32,
12196                       unop(Iop_16Sto32,
12197                            unop(Iop_32to16,
12198                                 mkexpr(rNt)
12199                            )
12200                       ),
12201                       unop(Iop_16Sto32,
12202                            unop(Iop_32to16,
12203                                 binop(Iop_Shr32,
12204                                       mkexpr(rMt), mkU8(16)
12205                                 )
12206                            )
12207                       )
12208                 )
12209         );
12210
12211         assign( irt_diff,
12212                 binop(Iop_Sub32,
12213                       unop(Iop_16Sto32,
12214                            unop(Iop_32to16,
12215                                 binop(Iop_Shr32,
12216                                       mkexpr(rNt), mkU8(16)
12217                                 )
12218                            )
12219                       ),
12220                       unop(Iop_16Sto32,
12221                            unop(Iop_32to16, mkexpr(rMt)
12222                            )
12223                       )
12224                 )
12225         );
12226
12227         assign( res_q,
12228                 binop(Iop_Or32,
12229                       unop(Iop_16Uto32,
12230                            unop(Iop_32to16,
12231                                 binop(Iop_Shr32,
12232                                       mkexpr(irt_sum), mkU8(1)
12233                                 )
12234                            )
12235                       ),
12236                       binop(Iop_Shl32,
12237                             binop(Iop_Shr32,
12238                                   mkexpr(irt_diff), mkU8(1)
12239                             ),
12240                             mkU8(16)
12241                      )
12242                 )
12243         );
12244
12245         if (isT)
12246            putIRegT( regD, mkexpr(res_q), condT );
12247         else
12248            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12249
12250         DIP("shsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12251         return True;
12252      }
12253      /* fall through */
12254    }
12255
12256    /* ----------------- uhsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12257    {
12258      UInt regD = 99, regN = 99, regM = 99;
12259      Bool gate = False;
12260
12261      if (isT) {
12262         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12263            regN = INSNT0(3,0);
12264            regD = INSNT1(11,8);
12265            regM = INSNT1(3,0);
12266            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12267               gate = True;
12268         }
12269      } else {
12270         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12271             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12272             INSNA(7,4)   == BITS4(0,1,0,1)) {
12273            regD = INSNA(15,12);
12274            regN = INSNA(19,16);
12275            regM = INSNA(3,0);
12276            if (regD != 15 && regN != 15 && regM != 15)
12277               gate = True;
12278         }
12279      }
12280
12281      if (gate) {
12282         IRTemp rNt   = newTemp(Ity_I32);
12283         IRTemp rMt   = newTemp(Ity_I32);
12284         IRTemp irt_diff  = newTemp(Ity_I32);
12285         IRTemp irt_sum   = newTemp(Ity_I32);
12286         IRTemp res_q = newTemp(Ity_I32);
12287
12288         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12289         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12290
12291         assign( irt_sum,
12292                 binop(Iop_Add32,
12293                       unop(Iop_16Uto32,
12294                            unop(Iop_32to16,
12295                                 mkexpr(rNt)
12296                            )
12297                       ),
12298                       unop(Iop_16Uto32,
12299                            unop(Iop_32to16,
12300                                 binop(Iop_Shr32,
12301                                       mkexpr(rMt), mkU8(16)
12302                                 )
12303                            )
12304                       )
12305                 )
12306         );
12307
12308         assign( irt_diff,
12309                 binop(Iop_Sub32,
12310                       unop(Iop_16Uto32,
12311                            unop(Iop_32to16,
12312                                 binop(Iop_Shr32,
12313                                       mkexpr(rNt), mkU8(16)
12314                                 )
12315                            )
12316                       ),
12317                       unop(Iop_16Uto32,
12318                            unop(Iop_32to16, mkexpr(rMt)
12319                            )
12320                       )
12321                 )
12322         );
12323
12324         assign( res_q,
12325                 binop(Iop_Or32,
12326                       unop(Iop_16Uto32,
12327                            unop(Iop_32to16,
12328                                 binop(Iop_Shr32,
12329                                       mkexpr(irt_sum), mkU8(1)
12330                                 )
12331                            )
12332                       ),
12333                       binop(Iop_Shl32,
12334                             binop(Iop_Shr32,
12335                                   mkexpr(irt_diff), mkU8(1)
12336                             ),
12337                             mkU8(16)
12338                      )
12339                 )
12340         );
12341
12342         if (isT)
12343            putIRegT( regD, mkexpr(res_q), condT );
12344         else
12345            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12346
12347         DIP("uhsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12348         return True;
12349      }
12350      /* fall through */
12351    }
12352
12353    /* ----------------- shsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
12354    {
12355      UInt regD = 99, regN = 99, regM = 99;
12356      Bool gate = False;
12357
12358      if (isT) {
12359         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12360            regN = INSNT0(3,0);
12361            regD = INSNT1(11,8);
12362            regM = INSNT1(3,0);
12363            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12364               gate = True;
12365         }
12366      } else {
12367         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12368             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12369             INSNA(7,4)   == BITS4(0,1,1,1)) {
12370            regD = INSNA(15,12);
12371            regN = INSNA(19,16);
12372            regM = INSNA(3,0);
12373            if (regD != 15 && regN != 15 && regM != 15)
12374               gate = True;
12375         }
12376      }
12377
12378      if (gate) {
12379         IRTemp rNt   = newTemp(Ity_I32);
12380         IRTemp rMt   = newTemp(Ity_I32);
12381         IRTemp res_q = newTemp(Ity_I32);
12382
12383         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12384         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12385
12386         assign(res_q, binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
12387         if (isT)
12388            putIRegT( regD, mkexpr(res_q), condT );
12389         else
12390            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12391
12392         DIP("shsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12393         return True;
12394      }
12395      /* fall through */
12396    }
12397
12398    /* ----------------- smmls{r}<c> <Rd>,<Rn>,<Rm>,<Ra> ------------------- */
12399    {
12400      UInt rD = 99, rN = 99, rM = 99, rA = 99;
12401      Bool round  = False;
12402      Bool gate   = False;
12403
12404      if (isT) {
12405         if (INSNT0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
12406             && INSNT0(6,4) == BITS3(1,1,0)
12407             && INSNT1(7,5) == BITS3(0,0,0)) {
12408            round = INSNT1(4,4);
12409            rA    = INSNT1(15,12);
12410            rD    = INSNT1(11,8);
12411            rM    = INSNT1(3,0);
12412            rN    = INSNT0(3,0);
12413            if (!isBadRegT(rD)
12414                && !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rA))
12415               gate = True;
12416         }
12417      } else {
12418         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,1)
12419             && INSNA(15,12) != BITS4(1,1,1,1)
12420             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(1,1,0,1)) {
12421            round = INSNA(5,5);
12422            rD    = INSNA(19,16);
12423            rA    = INSNA(15,12);
12424            rM    = INSNA(11,8);
12425            rN    = INSNA(3,0);
12426            if (rD != 15 && rM != 15 && rN != 15)
12427               gate = True;
12428         }
12429      }
12430      if (gate) {
12431         IRTemp irt_rA   = newTemp(Ity_I32);
12432         IRTemp irt_rN   = newTemp(Ity_I32);
12433         IRTemp irt_rM   = newTemp(Ity_I32);
12434         assign( irt_rA, isT ? getIRegT(rA) : getIRegA(rA) );
12435         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12436         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12437         IRExpr* res
12438         = unop(Iop_64HIto32,
12439                binop(Iop_Add64,
12440                      binop(Iop_Sub64,
12441                            binop(Iop_32HLto64, mkexpr(irt_rA), mkU32(0)),
12442                            binop(Iop_MullS32, mkexpr(irt_rN), mkexpr(irt_rM))),
12443                      mkU64(round ? 0x80000000ULL : 0ULL)));
12444         if (isT)
12445            putIRegT( rD, res, condT );
12446         else
12447            putIRegA(rD, res, condT, Ijk_Boring);
12448         DIP("smmls%s%s r%u, r%u, r%u, r%u\n",
12449             round ? "r" : "", nCC(conq), rD, rN, rM, rA);
12450         return True;
12451      }
12452      /* fall through */
12453    }
12454
12455    /* -------------- smlald{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12456    {
12457      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12458      Bool m_swap = False;
12459      Bool gate   = False;
12460
12461      if (isT) {
12462         if (INSNT0(15,4) == 0xFBC &&
12463             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0)) {
12464            rN     = INSNT0(3,0);
12465            rDlo   = INSNT1(15,12);
12466            rDhi   = INSNT1(11,8);
12467            rM     = INSNT1(3,0);
12468            m_swap = (INSNT1(4,4) & 1) == 1;
12469            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
12470                && !isBadRegT(rM) && rDhi != rDlo)
12471               gate = True;
12472         }
12473      } else {
12474         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0)
12475             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
12476            rN     = INSNA(3,0);
12477            rDlo   = INSNA(15,12);
12478            rDhi   = INSNA(19,16);
12479            rM     = INSNA(11,8);
12480            m_swap = ( INSNA(5,5) & 1 ) == 1;
12481            if (rDlo != 15 && rDhi != 15
12482                && rN != 15 && rM != 15 && rDlo != rDhi)
12483               gate = True;
12484         }
12485      }
12486
12487      if (gate) {
12488         IRTemp irt_rM   = newTemp(Ity_I32);
12489         IRTemp irt_rN   = newTemp(Ity_I32);
12490         IRTemp irt_rDhi = newTemp(Ity_I32);
12491         IRTemp irt_rDlo = newTemp(Ity_I32);
12492         IRTemp op_2     = newTemp(Ity_I32);
12493         IRTemp pr_1     = newTemp(Ity_I64);
12494         IRTemp pr_2     = newTemp(Ity_I64);
12495         IRTemp result   = newTemp(Ity_I64);
12496         IRTemp resHi    = newTemp(Ity_I32);
12497         IRTemp resLo    = newTemp(Ity_I32);
12498         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM));
12499         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN));
12500         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi));
12501         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo));
12502         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12503         assign( pr_1, binop(Iop_MullS32,
12504                             unop(Iop_16Sto32,
12505                                  unop(Iop_32to16, mkexpr(irt_rN))
12506                             ),
12507                             unop(Iop_16Sto32,
12508                                  unop(Iop_32to16, mkexpr(op_2))
12509                             )
12510                       )
12511         );
12512         assign( pr_2, binop(Iop_MullS32,
12513                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12514                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12515                       )
12516         );
12517         assign( result, binop(Iop_Add64,
12518                               binop(Iop_Add64,
12519                                     mkexpr(pr_1),
12520                                     mkexpr(pr_2)
12521                               ),
12522                               binop(Iop_32HLto64,
12523                                     mkexpr(irt_rDhi),
12524                                     mkexpr(irt_rDlo)
12525                               )
12526                         )
12527         );
12528         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12529         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12530         if (isT) {
12531            putIRegT( rDhi, mkexpr(resHi), condT );
12532            putIRegT( rDlo, mkexpr(resLo), condT );
12533         } else {
12534            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12535            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12536         }
12537         DIP("smlald%c%s r%u, r%u, r%u, r%u\n",
12538             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12539         return True;
12540      }
12541      /* fall through */
12542    }
12543
12544    /* -------------- smlsld{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12545    {
12546      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12547      Bool m_swap = False;
12548      Bool gate   = False;
12549
12550      if (isT) {
12551         if ((INSNT0(15,4) == 0xFBD &&
12552             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0))) {
12553            rN     = INSNT0(3,0);
12554            rDlo   = INSNT1(15,12);
12555            rDhi   = INSNT1(11,8);
12556            rM     = INSNT1(3,0);
12557            m_swap = (INSNT1(4,4) & 1) == 1;
12558            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN) &&
12559                !isBadRegT(rM) && rDhi != rDlo)
12560               gate = True;
12561         }
12562      } else {
12563         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0) &&
12564             (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,1,0,1)) {
12565            rN     = INSNA(3,0);
12566            rDlo   = INSNA(15,12);
12567            rDhi   = INSNA(19,16);
12568            rM     = INSNA(11,8);
12569            m_swap = (INSNA(5,5) & 1) == 1;
12570            if (rDlo != 15 && rDhi != 15 &&
12571                rN != 15 && rM != 15 && rDlo != rDhi)
12572               gate = True;
12573         }
12574      }
12575      if (gate) {
12576         IRTemp irt_rM   = newTemp(Ity_I32);
12577         IRTemp irt_rN   = newTemp(Ity_I32);
12578         IRTemp irt_rDhi = newTemp(Ity_I32);
12579         IRTemp irt_rDlo = newTemp(Ity_I32);
12580         IRTemp op_2     = newTemp(Ity_I32);
12581         IRTemp pr_1     = newTemp(Ity_I64);
12582         IRTemp pr_2     = newTemp(Ity_I64);
12583         IRTemp result   = newTemp(Ity_I64);
12584         IRTemp resHi    = newTemp(Ity_I32);
12585         IRTemp resLo    = newTemp(Ity_I32);
12586         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12587         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12588         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi) );
12589         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo) );
12590         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12591         assign( pr_1, binop(Iop_MullS32,
12592                             unop(Iop_16Sto32,
12593                                  unop(Iop_32to16, mkexpr(irt_rN))
12594                             ),
12595                             unop(Iop_16Sto32,
12596                                  unop(Iop_32to16, mkexpr(op_2))
12597                             )
12598                       )
12599         );
12600         assign( pr_2, binop(Iop_MullS32,
12601                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12602                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12603                       )
12604         );
12605         assign( result, binop(Iop_Add64,
12606                               binop(Iop_Sub64,
12607                                     mkexpr(pr_1),
12608                                     mkexpr(pr_2)
12609                               ),
12610                               binop(Iop_32HLto64,
12611                                     mkexpr(irt_rDhi),
12612                                     mkexpr(irt_rDlo)
12613                               )
12614                         )
12615         );
12616         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12617         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12618         if (isT) {
12619            putIRegT( rDhi, mkexpr(resHi), condT );
12620            putIRegT( rDlo, mkexpr(resLo), condT );
12621         } else {
12622            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12623            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12624         }
12625         DIP("smlsld%c%s r%u, r%u, r%u, r%u\n",
12626             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12627         return True;
12628      }
12629      /* fall through */
12630    }
12631
12632    /* ---------- Doesn't match anything. ---------- */
12633    return False;
12634
12635 #  undef INSNA
12636 #  undef INSNT0
12637 #  undef INSNT1
12638 }
12639
12640
12641 /*------------------------------------------------------------*/
12642 /*--- V8 instructions                                      ---*/
12643 /*------------------------------------------------------------*/
12644
12645 /* Break a V128-bit value up into four 32-bit ints. */
12646
12647 static void breakupV128to32s ( IRTemp t128,
12648                                /*OUTs*/
12649                                IRTemp* t3, IRTemp* t2,
12650                                IRTemp* t1, IRTemp* t0 )
12651 {
12652    IRTemp hi64 = newTemp(Ity_I64);
12653    IRTemp lo64 = newTemp(Ity_I64);
12654    assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
12655    assign( lo64, unop(Iop_V128to64,   mkexpr(t128)) );
12656
12657    vassert(t0 && *t0 == IRTemp_INVALID);
12658    vassert(t1 && *t1 == IRTemp_INVALID);
12659    vassert(t2 && *t2 == IRTemp_INVALID);
12660    vassert(t3 && *t3 == IRTemp_INVALID);
12661
12662    *t0 = newTemp(Ity_I32);
12663    *t1 = newTemp(Ity_I32);
12664    *t2 = newTemp(Ity_I32);
12665    *t3 = newTemp(Ity_I32);
12666    assign( *t0, unop(Iop_64to32,   mkexpr(lo64)) );
12667    assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
12668    assign( *t2, unop(Iop_64to32,   mkexpr(hi64)) );
12669    assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
12670 }
12671
12672
12673 /* Both ARM and Thumb */
12674
12675 /* Translate a V8 instruction.  If successful, returns True and *dres
12676    may or may not be updated.  If unsuccessful, returns False and
12677    doesn't change *dres nor create any IR.
12678
12679    The Thumb and ARM encodings are potentially different.  In both
12680    ARM and Thumb mode, the caller must pass the entire 32 bits of
12681    the instruction.  Callers may pass any instruction; this function
12682    ignores anything it doesn't recognise.
12683
12684    Caller must supply an IRTemp 'condT' holding the gating condition,
12685    or IRTemp_INVALID indicating the insn is always executed.
12686
12687    If we are decoding an ARM instruction which is in the NV space
12688    then it is expected that condT will be IRTemp_INVALID, and that is
12689    asserted for.  That condition is ensured by the logic near the top
12690    of disInstr_ARM_WRK, that sets up condT.
12691
12692    When decoding for Thumb, the caller must pass the ITState pre/post
12693    this instruction, so that we can generate a SIGILL in the cases where
12694    the instruction may not be in an IT block.  When decoding for ARM,
12695    both of these must be IRTemp_INVALID.
12696
12697    Finally, the caller must indicate whether this occurs in ARM or in
12698    Thumb code.
12699 */
12700 static Bool decode_V8_instruction (
12701                /*MOD*/DisResult* dres,
12702                UInt              insnv8,
12703                IRTemp            condT,
12704                Bool              isT,
12705                IRTemp            old_itstate,
12706                IRTemp            new_itstate
12707             )
12708 {
12709 #  define INSN(_bMax,_bMin)   SLICE_UInt(insnv8, (_bMax), (_bMin))
12710
12711    if (isT) {
12712       vassert(old_itstate != IRTemp_INVALID);
12713       vassert(new_itstate != IRTemp_INVALID);
12714    } else {
12715       vassert(old_itstate == IRTemp_INVALID);
12716       vassert(new_itstate == IRTemp_INVALID);
12717    }
12718
12719    /* ARMCondcode 'conq' is only used for debug printing and for no other
12720       purpose.  For ARM, this is simply the top 4 bits of the instruction.
12721       For Thumb, the condition is not (really) known until run time, and so
12722       we set it to ARMCondAL in order that printing of these instructions
12723       does not show any condition. */
12724    ARMCondcode conq;
12725    if (isT) {
12726       conq = ARMCondAL;
12727    } else {
12728       conq = (ARMCondcode)INSN(31,28);
12729       if (conq == ARMCondNV || conq == ARMCondAL) {
12730          vassert(condT == IRTemp_INVALID);
12731       } else {
12732          vassert(condT != IRTemp_INVALID);
12733       }
12734       vassert(conq >= ARMCondEQ && conq <= ARMCondNV);
12735    }
12736
12737    /* ----------- {AESD, AESE, AESMC, AESIMC}.8 q_q ----------- */
12738    /*     31   27   23  21 19 17 15 11   7      3
12739       T1: 1111 1111 1 D 11 sz 00 d  0011 00 M 0 m  AESE Qd, Qm
12740       A1: 1111 0011 1 D 11 sz 00 d  0011 00 M 0 m  AESE Qd, Qm
12741
12742       T1: 1111 1111 1 D 11 sz 00 d  0011 01 M 0 m  AESD Qd, Qm
12743       A1: 1111 0011 1 D 11 sz 00 d  0011 01 M 0 m  AESD Qd, Qm
12744
12745       T1: 1111 1111 1 D 11 sz 00 d  0011 10 M 0 m  AESMC Qd, Qm
12746       A1: 1111 0011 1 D 11 sz 00 d  0011 10 M 0 m  AESMC Qd, Qm
12747
12748       T1: 1111 1111 1 D 11 sz 00 d  0011 11 M 0 m  AESIMC Qd, Qm
12749       A1: 1111 0011 1 D 11 sz 00 d  0011 11 M 0 m  AESIMC Qd, Qm
12750
12751       sz must be 00
12752       ARM encoding is in NV space.
12753       In Thumb mode, we must not be in an IT block.
12754    */
12755    {
12756      UInt regD = 99, regM = 99, opc = 4/*invalid*/;
12757      Bool gate = True;
12758
12759      UInt high9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
12760      if (INSN(31,23) == high9 && INSN(21,16) == BITS6(1,1,0,0,0,0)
12761          && INSN(11,8) == BITS4(0,0,1,1) && INSN(4,4) == 0) {
12762         UInt bitD = INSN(22,22);
12763         UInt fldD = INSN(15,12);
12764         UInt bitM = INSN(5,5);
12765         UInt fldM = INSN(3,0);
12766         opc  = INSN(7,6);
12767         regD = (bitD << 4) | fldD;
12768         regM = (bitM << 4) | fldM;
12769      }
12770      if ((regD & 1) == 1 || (regM & 1) == 1)
12771         gate = False;
12772
12773      if (gate) {
12774         if (isT) {
12775            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
12776         }
12777         /* In ARM mode, this is statically unconditional.  In Thumb mode,
12778            this must be dynamically unconditional, and we've SIGILLd if not.
12779            In either case we can create unconditional IR. */
12780         IRTemp op1 = newTemp(Ity_V128);
12781         IRTemp op2 = newTemp(Ity_V128);
12782         IRTemp src = newTemp(Ity_V128);
12783         IRTemp res = newTemp(Ity_V128);
12784         assign(op1,  getQReg(regD >> 1));
12785         assign(op2,  getQReg(regM >> 1));
12786         assign(src,  opc == BITS2(0,0) || opc == BITS2(0,1)
12787                         ? binop(Iop_XorV128, mkexpr(op1), mkexpr(op2))
12788                         : mkexpr(op2));
12789
12790         void* helpers[4]
12791            = { &armg_dirtyhelper_AESE,  &armg_dirtyhelper_AESD,
12792                &armg_dirtyhelper_AESMC, &armg_dirtyhelper_AESIMC };
12793         const HChar* hNames[4]
12794            = { "armg_dirtyhelper_AESE",  "armg_dirtyhelper_AESD",
12795                "armg_dirtyhelper_AESMC", "armg_dirtyhelper_AESIMC" };
12796         const HChar* iNames[4]
12797            = { "aese", "aesd", "aesmc", "aesimc" };
12798
12799         vassert(opc >= 0 && opc <= 3);
12800         void*        helper = helpers[opc];
12801         const HChar* hname  = hNames[opc];
12802
12803         IRTemp w32_3, w32_2, w32_1, w32_0;
12804         w32_3 = w32_2 = w32_1 = w32_0 = IRTemp_INVALID;
12805         breakupV128to32s( src, &w32_3, &w32_2, &w32_1, &w32_0 );
12806
12807         IRDirty* di
12808           = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
12809                                mkIRExprVec_5(
12810                                   IRExpr_VECRET(),
12811                                   mkexpr(w32_3), mkexpr(w32_2),
12812                                   mkexpr(w32_1), mkexpr(w32_0)) );
12813         stmt(IRStmt_Dirty(di));
12814
12815         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
12816         DIP("%s.8 q%u, q%u\n", iNames[opc], regD >> 1, regM >> 1);
12817         return True;
12818      }
12819      /* fall through */
12820    }
12821
12822    /* ----------- SHA 3-reg insns q_q_q ----------- */
12823    /*
12824           31   27   23      19 15 11   7       3
12825       T1: 1110 1111 0  D 00 n  d  1100 N Q M 0 m  SHA1C Qd, Qn, Qm  ix=0
12826       A1: 1111 0010 ----------------------------
12827
12828       T1: 1110 1111 0  D 01 n  d  1100 N Q M 0 m  SHA1P Qd, Qn, Qm  ix=1
12829       A1: 1111 0010 ----------------------------
12830
12831       T1: 1110 1111 0  D 10 n  d  1100 N Q M 0 m  SHA1M Qd, Qn, Qm  ix=2
12832       A1: 1111 0010 ----------------------------
12833
12834       T1: 1110 1111 0  D 11 n  d  1100 N Q M 0 m  SHA1SU0 Qd, Qn, Qm  ix=3
12835       A1: 1111 0010 ----------------------------
12836       (that's a complete set of 4, based on insn[21,20])
12837
12838       T1: 1111 1111 0  D 00 n  d  1100 N Q M 0 m  SHA256H Qd, Qn, Qm  ix=4
12839       A1: 1111 0011 ----------------------------
12840
12841       T1: 1111 1111 0  D 01 n  d  1100 N Q M 0 m  SHA256H2 Qd, Qn, Qm  ix=5
12842       A1: 1111 0011 ----------------------------
12843
12844       T1: 1111 1111 0  D 10 n  d  1100 N Q M 0 m  SHA256SU1 Qd, Qn, Qm  ix=6
12845       A1: 1111 0011 ----------------------------
12846       (3/4 of a complete set of 4, based on insn[21,20])
12847
12848       Q must be 1.  Same comments about conditionalisation as for the AES
12849       group above apply.
12850    */
12851    {
12852      UInt ix = 8; /* invalid */
12853      Bool gate = False;
12854
12855      UInt hi9_sha1   = isT ? BITS9(1,1,1,0,1,1,1,1,0)
12856                            : BITS9(1,1,1,1,0,0,1,0,0);
12857      UInt hi9_sha256 = isT ? BITS9(1,1,1,1,1,1,1,1,0)
12858                            : BITS9(1,1,1,1,0,0,1,1,0);
12859      if ((INSN(31,23) == hi9_sha1 || INSN(31,23) == hi9_sha256)
12860          && INSN(11,8) == BITS4(1,1,0,0)
12861          && INSN(6,6) == 1 && INSN(4,4) == 0) {
12862         ix = INSN(21,20);
12863         if (INSN(31,23) == hi9_sha256)
12864            ix |= 4;
12865         if (ix < 7)
12866            gate = True;
12867      }
12868
12869      UInt regN = (INSN(7,7)   << 4)  | INSN(19,16);
12870      UInt regD = (INSN(22,22) << 4)  | INSN(15,12);
12871      UInt regM = (INSN(5,5)   << 4)  | INSN(3,0);
12872      if ((regD & 1) == 1 || (regM & 1) == 1 || (regN & 1) == 1)
12873         gate = False;
12874
12875      if (gate) {
12876         vassert(ix >= 0 && ix < 7);
12877         const HChar* inames[7]
12878            = { "sha1c", "sha1p", "sha1m", "sha1su0",
12879                "sha256h", "sha256h2", "sha256su1" };
12880         void(*helpers[7])(V128*,UInt,UInt,UInt,UInt,UInt,UInt,
12881                                 UInt,UInt,UInt,UInt,UInt,UInt)
12882            = { &armg_dirtyhelper_SHA1C,    &armg_dirtyhelper_SHA1P,
12883                &armg_dirtyhelper_SHA1M,    &armg_dirtyhelper_SHA1SU0,
12884                &armg_dirtyhelper_SHA256H,  &armg_dirtyhelper_SHA256H2,
12885                &armg_dirtyhelper_SHA256SU1 };
12886         const HChar* hnames[7]
12887            = { "armg_dirtyhelper_SHA1C",    "armg_dirtyhelper_SHA1P",
12888                "armg_dirtyhelper_SHA1M",    "armg_dirtyhelper_SHA1SU0",
12889                "armg_dirtyhelper_SHA256H",  "armg_dirtyhelper_SHA256H2",
12890                "armg_dirtyhelper_SHA256SU1" };
12891
12892         /* This is a really lame way to implement this, even worse than
12893            the arm64 version.  But at least it works. */
12894
12895         if (isT) {
12896            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
12897         }
12898
12899         IRTemp vD = newTemp(Ity_V128);
12900         IRTemp vN = newTemp(Ity_V128);
12901         IRTemp vM = newTemp(Ity_V128);
12902         assign(vD,  getQReg(regD >> 1));
12903         assign(vN,  getQReg(regN >> 1));
12904         assign(vM,  getQReg(regM >> 1));
12905
12906         IRTemp d32_3, d32_2, d32_1, d32_0;
12907         d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID;
12908         breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 );
12909
12910         IRTemp n32_3_pre, n32_2_pre, n32_1_pre, n32_0_pre;
12911         n32_3_pre = n32_2_pre = n32_1_pre = n32_0_pre = IRTemp_INVALID;
12912         breakupV128to32s( vN, &n32_3_pre, &n32_2_pre, &n32_1_pre, &n32_0_pre );
12913
12914         IRTemp m32_3, m32_2, m32_1, m32_0;
12915         m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
12916         breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
12917
12918         IRTemp n32_3 = newTemp(Ity_I32);
12919         IRTemp n32_2 = newTemp(Ity_I32);
12920         IRTemp n32_1 = newTemp(Ity_I32);
12921         IRTemp n32_0 = newTemp(Ity_I32);
12922
12923         /* Mask off any bits of the N register operand that aren't actually
12924            needed, so that Memcheck doesn't complain unnecessarily. */
12925         switch (ix) {
12926            case 0: case 1: case 2:
12927               assign(n32_3, mkU32(0));
12928               assign(n32_2, mkU32(0));
12929               assign(n32_1, mkU32(0));
12930               assign(n32_0, mkexpr(n32_0_pre));
12931               break;
12932            case 3: case 4: case 5: case 6:
12933               assign(n32_3, mkexpr(n32_3_pre));
12934               assign(n32_2, mkexpr(n32_2_pre));
12935               assign(n32_1, mkexpr(n32_1_pre));
12936               assign(n32_0, mkexpr(n32_0_pre));
12937               break;
12938            default:
12939               vassert(0);
12940         }
12941
12942         IRExpr** argvec
12943            = mkIRExprVec_13(
12944                 IRExpr_VECRET(),
12945                 mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0),
12946                 mkexpr(n32_3), mkexpr(n32_2), mkexpr(n32_1), mkexpr(n32_0),
12947                 mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0)
12948              );
12949
12950         IRTemp res = newTemp(Ity_V128);
12951         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
12952                                          hnames[ix], helpers[ix], argvec );
12953         stmt(IRStmt_Dirty(di));
12954         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
12955
12956         DIP("%s.8 q%u, q%u, q%u\n",
12957             inames[ix], regD >> 1, regN >> 1, regM >> 1);
12958         return True;
12959      }
12960      /* fall through */
12961    }
12962
12963    /* ----------- SHA1SU1, SHA256SU0 ----------- */
12964    /*
12965           31   27   23  21 19   15 11   7      3
12966       T1: 1111 1111 1 D 11 1010 d  0011 10 M 0 m  SHA1SU1 Qd, Qm
12967       A1: 1111 0011 ----------------------------
12968
12969       T1: 1111 1111 1 D 11 1010 d  0011 11 M 0 m  SHA256SU0 Qd, Qm
12970       A1: 1111 0011 ----------------------------
12971
12972       Same comments about conditionalisation as for the AES group above apply.
12973    */
12974    {
12975      Bool gate = False;
12976
12977      UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
12978      if (INSN(31,23) == hi9 && INSN(21,16) == BITS6(1,1,1,0,1,0)
12979          && INSN(11,7) == BITS5(0,0,1,1,1) && INSN(4,4) == 0) {
12980         gate = True;
12981      }
12982
12983      UInt regD = (INSN(22,22) << 4) | INSN(15,12);
12984      UInt regM = (INSN(5,5)   << 4) | INSN(3,0);
12985      if ((regD & 1) == 1 || (regM & 1) == 1)
12986         gate = False;
12987
12988      Bool is_1SU1 = INSN(6,6) == 0;
12989
12990      if (gate) {
12991         const HChar* iname
12992            = is_1SU1 ? "sha1su1" : "sha256su0";
12993         void (*helper)(V128*,UInt,UInt,UInt,UInt,UInt,UInt,UInt,UInt)
12994            = is_1SU1 ? &armg_dirtyhelper_SHA1SU1
12995                      : *armg_dirtyhelper_SHA256SU0;
12996         const HChar* hname
12997            = is_1SU1 ? "armg_dirtyhelper_SHA1SU1"
12998                      : "armg_dirtyhelper_SHA256SU0";
12999
13000         if (isT) {
13001            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13002         }
13003
13004         IRTemp vD = newTemp(Ity_V128);
13005         IRTemp vM = newTemp(Ity_V128);
13006         assign(vD,  getQReg(regD >> 1));
13007         assign(vM,  getQReg(regM >> 1));
13008
13009         IRTemp d32_3, d32_2, d32_1, d32_0;
13010         d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID;
13011         breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 );
13012
13013         IRTemp m32_3, m32_2, m32_1, m32_0;
13014         m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
13015         breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
13016
13017         IRExpr** argvec
13018            = mkIRExprVec_9(
13019                 IRExpr_VECRET(),
13020                 mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0),
13021                 mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0)
13022              );
13023
13024         IRTemp res = newTemp(Ity_V128);
13025         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13026                                          hname, helper, argvec );
13027         stmt(IRStmt_Dirty(di));
13028         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
13029
13030         DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1);
13031         return True;
13032      }
13033      /* fall through */
13034    }
13035
13036    /* ----------- SHA1H ----------- */
13037    /*
13038           31   27   23  21 19   15 11   7      3
13039       T1: 1111 1111 1 D 11 1001 d  0010 11 M 0 m  SHA1H Qd, Qm
13040       A1: 1111 0011 ----------------------------
13041
13042       Same comments about conditionalisation as for the AES group above apply.
13043    */
13044    {
13045      Bool gate = False;
13046
13047      UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
13048      if (INSN(31,23) == hi9 && INSN(21,16) == BITS6(1,1,1,0,0,1)
13049          && INSN(11,6) == BITS6(0,0,1,0,1,1) && INSN(4,4) == 0) {
13050         gate = True;
13051      }
13052
13053      UInt regD = (INSN(22,22) << 4) | INSN(15,12);
13054      UInt regM = (INSN(5,5)   << 4) | INSN(3,0);
13055      if ((regD & 1) == 1 || (regM & 1) == 1)
13056         gate = False;
13057
13058      if (gate) {
13059         const HChar* iname = "sha1h";
13060         void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_SHA1H;
13061         const HChar* hname                        = "armg_dirtyhelper_SHA1H";
13062
13063         if (isT) {
13064            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13065         }
13066
13067         IRTemp vM = newTemp(Ity_V128);
13068         assign(vM,  getQReg(regM >> 1));
13069
13070         IRTemp m32_3, m32_2, m32_1, m32_0;
13071         m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
13072         breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
13073         /* m32_3, m32_2, m32_1 are just abandoned.  No harm; iropt will
13074            remove them. */
13075
13076         IRExpr*  zero   = mkU32(0);
13077         IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(),
13078                                         zero, zero, zero, mkexpr(m32_0));
13079
13080         IRTemp res = newTemp(Ity_V128);
13081         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13082                                          hname, helper, argvec );
13083         stmt(IRStmt_Dirty(di));
13084         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
13085
13086         DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1);
13087         return True;
13088      }
13089      /* fall through */
13090    }
13091
13092    /* ----------- VMULL.P64 ----------- */
13093    /*
13094           31   27   23  21 19 15 11   7       3
13095       T2: 1110 1111 1 D 10 n  d  1110 N 0 M 0 m
13096       A2: 1111 0010 -------------------------
13097
13098       The ARM documentation is pretty difficult to follow here.
13099       Same comments about conditionalisation as for the AES group above apply.
13100    */
13101    {
13102      Bool gate = False;
13103
13104      UInt hi9 = isT ? BITS9(1,1,1,0,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,0,1);
13105      if (INSN(31,23) == hi9 && INSN(21,20) == BITS2(1,0)
13106          && INSN(11,8) == BITS4(1,1,1,0)
13107          && INSN(6,6) == 0 && INSN(4,4) == 0) {
13108         gate = True;
13109      }
13110
13111      UInt regN = (INSN(7,7)   << 4)  | INSN(19,16);
13112      UInt regD = (INSN(22,22) << 4)  | INSN(15,12);
13113      UInt regM = (INSN(5,5)   << 4)  | INSN(3,0);
13114
13115      if ((regD & 1) == 1)
13116         gate = False;
13117
13118      if (gate) {
13119         const HChar* iname = "vmull";
13120         void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_VMULLP64;
13121         const HChar* hname                        = "armg_dirtyhelper_VMULLP64";
13122
13123         if (isT) {
13124            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13125         }
13126
13127         IRTemp srcN = newTemp(Ity_I64);
13128         IRTemp srcM = newTemp(Ity_I64);
13129         assign(srcN, getDRegI64(regN));
13130         assign(srcM, getDRegI64(regM));
13131
13132         IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(),
13133                                         unop(Iop_64HIto32, mkexpr(srcN)),
13134                                         unop(Iop_64to32,   mkexpr(srcN)),
13135                                         unop(Iop_64HIto32, mkexpr(srcM)),
13136                                         unop(Iop_64to32, mkexpr(srcM)));
13137
13138         IRTemp res = newTemp(Ity_V128);
13139         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13140                                          hname, helper, argvec );
13141         stmt(IRStmt_Dirty(di));
13142         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
13143
13144         DIP("%s.p64 q%u, q%u, w%u\n", iname, regD >> 1, regN, regM);
13145         return True;
13146      }
13147      /* fall through */
13148    }
13149
13150    /* ----------- LDA{,B,H}, STL{,B,H} ----------- */
13151    /*     31   27   23   19   15 11   7    3
13152       A1: cond 0001 1001  n    t 1100 1001 1111  LDA  Rt, [Rn]
13153       A1: cond 0001 1111  n    t 1100 1001 1111  LDAH Rt, [Rn]
13154       A1: cond 0001 1101  n    t 1100 1001 1111  LDAB Rt, [Rn]
13155
13156       A1: cond 0001 1000  n 1111 1100 1001    t  STL  Rt, [Rn]
13157       A1: cond 0001 1110  n 1111 1100 1001    t  STLH Rt, [Rn]
13158       A1: cond 0001 1100  n 1111 1100 1001    t  STLB Rt, [Rn]
13159
13160       T1: 1110 1000 1101  n    t 1111 1010 1111  LDA  Rt, [Rn]
13161       T1: 1110 1000 1101  n    t 1111 1001 1111  LDAH Rt, [Rn]
13162       T1: 1110 1000 1101  n    t 1111 1000 1111  LDAB Rt, [Rn]
13163
13164       T1: 1110 1000 1100  n    t 1111 1010 1111  STL  Rt, [Rn]
13165       T1: 1110 1000 1100  n    t 1111 1001 1111  STLH Rt, [Rn]
13166       T1: 1110 1000 1100  n    t 1111 1000 1111  STLB Rt, [Rn]
13167    */
13168    {
13169      UInt nn     = 16; // invalid
13170      UInt tt     = 16; // invalid
13171      UInt szBlg2 = 4;  // invalid
13172      Bool isLoad = False;
13173      Bool gate   = False;
13174      if (isT) {
13175         if (INSN(31,21) == BITS11(1,1,1,0,1,0,0,0,1,1,0)
13176             && INSN(11,6) == BITS6(1,1,1,1,1,0)
13177             && INSN(3,0) == BITS4(1,1,1,1)) {
13178            nn     = INSN(19,16);
13179            tt     = INSN(15,12);
13180            isLoad = INSN(20,20) == 1;
13181            szBlg2 = INSN(5,4); // 00:B 01:H 10:W 11:invalid
13182            gate   = szBlg2 != BITS2(1,1) && tt != 15 && nn != 15;
13183         }
13184      } else {
13185         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 1
13186             && INSN(11,0) == BITS12(1,1,0,0,1,0,0,1,1,1,1,1)) {
13187            nn     = INSN(19,16);
13188            tt     = INSN(15,12);
13189            isLoad = True;
13190            szBlg2     = INSN(22,21); // 10:B 11:H 00:W 01:invalid
13191            gate   = szBlg2 != BITS2(0,1) && tt != 15 && nn != 15;
13192         }
13193         else
13194         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 0
13195             && INSN(15,4) == BITS12(1,1,1,1,1,1,0,0,1,0,0,1)) {
13196            nn     = INSN(19,16);
13197            tt     = INSN(3,0);
13198            isLoad = False;
13199            szBlg2     = INSN(22,21);  // 10:B 11:H 00:W 01:invalid
13200            gate   = szBlg2 != BITS2(0,1) && tt != 15 && nn != 15;
13201         }
13202         if (gate) {
13203            // Rearrange szBlg2 bits to be the same as the Thumb case
13204            switch (szBlg2) {
13205               case 2: szBlg2 = 0; break;
13206               case 3: szBlg2 = 1; break;
13207               case 0: szBlg2 = 2; break;
13208               default: /*NOTREACHED*/vassert(0);
13209            }
13210         }
13211      }
13212      // For both encodings, the instruction is guarded by condT, which
13213      // is passed in by the caller.  Note that the the loads and stores
13214      // are conditional, so we don't have to truncate the IRSB at this
13215      // point, but the fence is unconditional.  There's no way to
13216      // represent a conditional fence without a side exit, but it
13217      // doesn't matter from a correctness standpoint that it is
13218      // unconditional -- it just loses a bit of performance in the
13219      // case where the condition doesn't hold.
13220      if (gate) {
13221         vassert(szBlg2 <= 2 && nn <= 14 && tt <= 14);
13222         IRExpr* ea = llGetIReg(nn);
13223         if (isLoad) {
13224            static IRLoadGOp cvt[3]
13225               = { ILGop_8Uto32, ILGop_16Uto32, ILGop_Ident32 };
13226            IRTemp data = newTemp(Ity_I32);
13227            loadGuardedLE(data, cvt[szBlg2], ea, mkU32(0)/*alt*/, condT);
13228            if (isT) {
13229               putIRegT(tt, mkexpr(data), condT);
13230            } else {
13231               putIRegA(tt, mkexpr(data), condT, Ijk_INVALID);
13232            }
13233            stmt(IRStmt_MBE(Imbe_Fence));
13234         } else {
13235            stmt(IRStmt_MBE(Imbe_Fence));
13236            IRExpr* data = llGetIReg(tt);
13237            switch (szBlg2) {
13238               case 0: data = unop(Iop_32to8,  data); break;
13239               case 1: data = unop(Iop_32to16, data); break;
13240               case 2: break;
13241               default: vassert(0);
13242            }
13243            storeGuardedLE(ea, data, condT);
13244         }
13245         const HChar* ldNames[3] = { "ldab", "ldah", "lda" };
13246         const HChar* stNames[3] = { "stlb", "stlh", "stl" };
13247         DIP("%s r%u, [r%u]", (isLoad ? ldNames : stNames)[szBlg2], tt, nn);
13248         return True;
13249      }
13250      /* else fall through */
13251    }
13252
13253    /* ----------- LDAEX{,B,H,D}, STLEX{,B,H,D} ----------- */
13254    /*     31   27   23   19 15 11   7    3
13255       A1: cond 0001 1101 n  t  1110 1001 1111  LDAEXB Rt, [Rn]
13256       A1: cond 0001 1111 n  t  1110 1001 1111  LDAEXH Rt, [Rn]
13257       A1: cond 0001 1001 n  t  1110 1001 1111  LDAEX  Rt, [Rn]
13258       A1: cond 0001 1011 n  t  1110 1001 1111  LDAEXD Rt, Rt+1, [Rn]
13259
13260       A1: cond 0001 1100 n  d  1110 1001 t     STLEXB Rd, Rt, [Rn]
13261       A1: cond 0001 1110 n  d  1110 1001 t     STLEXH Rd, Rt, [Rn]
13262       A1: cond 0001 1000 n  d  1110 1001 t     STLEX  Rd, Rt, [Rn]
13263       A1: cond 0001 1010 n  d  1110 1001 t     STLEXD Rd, Rt, Rt+1, [Rn]
13264
13265           31  28   24    19 15 11   7    3
13266       T1: 111 0100 01101 n  t  1111 1100 1111  LDAEXB Rt, [Rn]
13267       T1: 111 0100 01101 n  t  1111 1101 1111  LDAEXH Rt, [Rn]
13268       T1: 111 0100 01101 n  t  1111 1110 1111  LDAEX  Rt, [Rn]
13269       T1: 111 0100 01101 n  t  t2   1111 1111  LDAEXD Rt, Rt2, [Rn]
13270
13271       T1: 111 0100 01100 n  t  1111 1100 d     STLEXB Rd, Rt, [Rn]
13272       T1: 111 0100 01100 n  t  1111 1101 d     STLEXH Rd, Rt, [Rn]
13273       T1: 111 0100 01100 n  t  1111 1110 d     STLEX  Rd, Rt, [Rn]
13274       T1: 111 0100 01100 n  t  t2   1111 d     STLEXD Rd, Rt, Rt2, [Rn]
13275    */
13276    {
13277      UInt nn     = 16; // invalid
13278      UInt tt     = 16; // invalid
13279      UInt tt2    = 16; // invalid
13280      UInt dd     = 16; // invalid
13281      UInt szBlg2 = 4;  // invalid
13282      Bool isLoad = False;
13283      Bool gate   = False;
13284      if (isT) {
13285         if (INSN(31,21) == BITS11(1,1,1,0,1,0,0,0,1,1,0)
13286             && INSN(7,6) == BITS2(1,1)) {
13287            isLoad = INSN(20,20) == 1;
13288            nn     = INSN(19,16);
13289            tt     = INSN(15,12);
13290            tt2    = INSN(11,8);
13291            szBlg2 = INSN(5,4);
13292            dd     = INSN(3,0);
13293            gate   = True;
13294            if (szBlg2 < BITS2(1,1) && tt2 != BITS4(1,1,1,1)) gate = False;
13295            if (isLoad && dd != BITS4(1,1,1,1)) gate = False;
13296            // re-set not-used register values to invalid
13297            if (szBlg2 < BITS2(1,1)) tt2 = 16;
13298            if (isLoad) dd = 16;
13299         }
13300      } else {
13301         /* ARM encoding.  Do the load and store cases separately as
13302            the register numbers are in different places and a combined decode
13303            is too confusing. */
13304         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 1
13305             && INSN(11,0) == BITS12(1,1,1,0,1,0,0,1,1,1,1,1)) {
13306            szBlg2 = INSN(22,21);
13307            isLoad = True;
13308            nn     = INSN(19,16);
13309            tt     = INSN(15,12);
13310            gate   = True;
13311         }
13312         else
13313         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 0
13314             && INSN(11,4) == BITS8(1,1,1,0,1,0,0,1)) {
13315            szBlg2 = INSN(22,21);
13316            isLoad = False;
13317            nn     = INSN(19,16);
13318            dd     = INSN(15,12);
13319            tt     = INSN(3,0);
13320            gate   = True;
13321         }
13322         if (gate) {
13323            // Rearrange szBlg2 bits to be the same as the Thumb case
13324            switch (szBlg2) {
13325               case 2: szBlg2 = 0; break;
13326               case 3: szBlg2 = 1; break;
13327               case 0: szBlg2 = 2; break;
13328               case 1: szBlg2 = 3; break;
13329               default: /*NOTREACHED*/vassert(0);
13330            }
13331         }
13332      }
13333      // Perform further checks on register numbers
13334      if (gate) {
13335         /**/ if (isT && isLoad) {
13336            // Thumb load
13337            if (szBlg2 < 3) {
13338               if (! (tt != 13 && tt != 15 && nn != 15)) gate = False;
13339            } else {
13340               if (! (tt != 13 && tt != 15 && tt2 != 13 && tt2 != 15
13341                      && tt != tt2 && nn != 15)) gate = False;
13342            }
13343         }
13344         else if (isT && !isLoad) {
13345            // Thumb store
13346            if (szBlg2 < 3) {
13347               if (! (dd != 13 && dd != 15 && tt != 13 && tt != 15
13348                      && nn != 15 && dd != nn && dd != tt)) gate = False;
13349            } else {
13350               if (! (dd != 13 && dd != 15 && tt != 13 && tt != 15
13351                      && tt2 != 13 && tt2 != 15 && nn != 15 && dd != nn
13352                      && dd != tt && dd != tt2)) gate = False;
13353            }
13354         }
13355         else if (!isT && isLoad) {
13356            // ARM Load
13357            if (szBlg2 < 3) {
13358               if (! (tt != 15 && nn != 15)) gate = False;
13359            } else {
13360               if (! ((tt & 1) == 0 && tt != 14 && nn != 15)) gate = False;
13361               vassert(tt2 == 16/*invalid*/);
13362               tt2 = tt + 1;
13363            }
13364         }
13365         else if (!isT && !isLoad) {
13366            // ARM Store
13367            if (szBlg2 < 3) {
13368               if (! (dd != 15 && tt != 15 && nn != 15
13369                      && dd != nn && dd != tt)) gate = False;
13370            } else {
13371               if (! (dd != 15 && (tt & 1) == 0 && tt != 14 && nn != 15
13372                      && dd != nn && dd != tt && dd != tt+1)) gate = False;
13373               vassert(tt2 == 16/*invalid*/);
13374               tt2 = tt + 1;
13375            }
13376         }
13377         else /*NOTREACHED*/vassert(0);
13378      }
13379      if (gate) {
13380         // Paranoia ..
13381         vassert(szBlg2 <= 3);
13382         if (szBlg2 < 3) { vassert(tt2 == 16/*invalid*/); }
13383                    else { vassert(tt2 <= 14); }
13384         if (isLoad) { vassert(dd == 16/*invalid*/); }
13385                else { vassert(dd <= 14); }
13386      }
13387      // If we're still good even after all that, generate the IR.
13388      if (gate) {
13389         /* First, go unconditional.  Staying in-line is too complex. */
13390         if (isT) {
13391            vassert(condT != IRTemp_INVALID);
13392            mk_skip_over_T32_if_cond_is_false( condT );
13393         } else {
13394            if (condT != IRTemp_INVALID) {
13395               mk_skip_over_A32_if_cond_is_false( condT );
13396               condT = IRTemp_INVALID;
13397            }
13398         }
13399         /* Now the load or store. */
13400         IRType ty = Ity_INVALID; /* the type of the transferred data */
13401         const HChar* nm = NULL;
13402         switch (szBlg2) {
13403            case 0: nm = "b"; ty = Ity_I8;  break;
13404            case 1: nm = "h"; ty = Ity_I16; break;
13405            case 2: nm = "";  ty = Ity_I32; break;
13406            case 3: nm = "d"; ty = Ity_I64; break;
13407            default: vassert(0);
13408         }
13409         IRExpr* ea = isT ? getIRegT(nn) : getIRegA(nn);
13410         if (isLoad) {
13411            // LOAD.  Transaction, then fence.
13412            IROp widen = Iop_INVALID;
13413            switch (szBlg2) {
13414               case 0: widen = Iop_8Uto32;  break;
13415               case 1: widen = Iop_16Uto32; break;
13416               case 2: case 3: break;
13417               default: vassert(0);
13418            }
13419            IRTemp  res = newTemp(ty);
13420            // FIXME: assumes little-endian guest
13421            stmt( IRStmt_LLSC(Iend_LE, res, ea, NULL/*this is a load*/) );
13422
13423 #          define PUT_IREG(_nnz, _eez) \
13424               do { vassert((_nnz) <= 14); /* no writes to the PC */ \
13425                    if (isT) { putIRegT((_nnz), (_eez), IRTemp_INVALID); } \
13426                        else { putIRegA((_nnz), (_eez), \
13427                               IRTemp_INVALID, Ijk_Boring); } } while(0)
13428            if (ty == Ity_I64) {
13429               // FIXME: assumes little-endian guest
13430               PUT_IREG(tt,  unop(Iop_64to32, mkexpr(res)));
13431               PUT_IREG(tt2, unop(Iop_64HIto32, mkexpr(res)));
13432            } else {
13433               PUT_IREG(tt, widen == Iop_INVALID
13434                               ? mkexpr(res) : unop(widen, mkexpr(res)));
13435            }
13436            stmt(IRStmt_MBE(Imbe_Fence));
13437            if (ty == Ity_I64) {
13438               DIP("ldrex%s%s r%u, r%u, [r%u]\n",
13439                   nm, isT ? "" : nCC(conq), tt, tt2, nn);
13440            } else {
13441               DIP("ldrex%s%s r%u, [r%u]\n", nm, isT ? "" : nCC(conq), tt, nn);
13442            }
13443 #          undef PUT_IREG
13444         } else {
13445            // STORE.  Fence, then transaction.
13446            IRTemp resSC1, resSC32, data;
13447            IROp   narrow = Iop_INVALID;
13448            switch (szBlg2) {
13449               case 0: narrow = Iop_32to8; break;
13450               case 1: narrow = Iop_32to16; break;
13451               case 2: case 3: break;
13452               default: vassert(0);
13453            }
13454            stmt(IRStmt_MBE(Imbe_Fence));
13455            data = newTemp(ty);
13456 #          define GET_IREG(_nnz) (isT ? getIRegT(_nnz) : getIRegA(_nnz))
13457            assign(data,
13458                   ty == Ity_I64
13459                      // FIXME: assumes little-endian guest
13460                      ? binop(Iop_32HLto64, GET_IREG(tt2), GET_IREG(tt))
13461                      : narrow == Iop_INVALID
13462                         ? GET_IREG(tt)
13463                         : unop(narrow, GET_IREG(tt)));
13464 #          undef GET_IREG
13465            resSC1 = newTemp(Ity_I1);
13466            // FIXME: assumes little-endian guest
13467            stmt( IRStmt_LLSC(Iend_LE, resSC1, ea, mkexpr(data)) );
13468
13469            /* Set rDD to 1 on failure, 0 on success.  Currently we have
13470               resSC1 == 0 on failure, 1 on success. */
13471            resSC32 = newTemp(Ity_I32);
13472            assign(resSC32,
13473                   unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
13474            vassert(dd <= 14); /* no writes to the PC */
13475            if (isT) {
13476               putIRegT(dd, mkexpr(resSC32), IRTemp_INVALID);
13477            } else {
13478               putIRegA(dd, mkexpr(resSC32), IRTemp_INVALID, Ijk_Boring);
13479            }
13480            if (ty == Ity_I64) {
13481               DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
13482                   nm, isT ? "" : nCC(conq), dd, tt, tt2, nn);
13483            } else {
13484               DIP("strex%s%s r%u, r%u, [r%u]\n",
13485                   nm, isT ? "" : nCC(conq), dd, tt, nn);
13486            }
13487         } /* if (isLoad) */
13488         return True;
13489      } /* if (gate) */
13490      /* else fall through */
13491    }
13492
13493    /* ----------- VSEL<c>.F64 d_d_d, VSEL<c>.F32 s_s_s ----------- */
13494    /*        31   27    22 21 19 15 11  8 7 6 5 4 3
13495       T1/A1: 1111 11100 D  cc n  d  101 1 N 0 M 0 m  VSEL<c>.F64 Dd, Dn, Dm
13496       T1/A1: 1111 11100 D  cc n  d  101 0 N 0 M 0 m  VSEL<c>.F32 Sd, Sn, Sm
13497
13498       ARM encoding is in NV space.
13499       In Thumb mode, we must not be in an IT block.
13500    */
13501    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,0) && INSN(11,9) == BITS3(1,0,1)
13502        && INSN(6,6) == 0 && INSN(4,4) == 0) {
13503       UInt bit_D  = INSN(22,22);
13504       UInt fld_cc = INSN(21,20);
13505       UInt fld_n  = INSN(19,16);
13506       UInt fld_d  = INSN(15,12);
13507       Bool isF64  = INSN(8,8) == 1;
13508       UInt bit_N  = INSN(7,7);
13509       UInt bit_M  = INSN(5,5);
13510       UInt fld_m  = INSN(3,0);
13511
13512       UInt dd = isF64 ? ((bit_D << 4) | fld_d) : ((fld_d << 1) | bit_D);
13513       UInt nn = isF64 ? ((bit_N << 4) | fld_n) : ((fld_n << 1) | bit_N);
13514       UInt mm = isF64 ? ((bit_M << 4) | fld_m) : ((fld_m << 1) | bit_M);
13515
13516       UInt cc_1 = (fld_cc >> 1) & 1;
13517       UInt cc_0 = (fld_cc >> 0) & 1;
13518       UInt cond = (fld_cc << 2) | ((cc_1 ^ cc_0) << 1) | 0;
13519
13520       if (isT) {
13521          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13522       }
13523       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13524          this must be dynamically unconditional, and we've SIGILLd if not.
13525          In either case we can create unconditional IR. */
13526
13527       IRTemp guard = newTemp(Ity_I32);
13528       assign(guard, mk_armg_calculate_condition(cond));
13529       IRExpr* srcN = (isF64 ? llGetDReg : llGetFReg)(nn);
13530       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13531       IRExpr* res  = IRExpr_ITE(unop(Iop_32to1, mkexpr(guard)), srcN, srcM);
13532       (isF64 ? llPutDReg : llPutFReg)(dd, res);
13533
13534       UChar rch = isF64 ? 'd' : 'f';
13535       DIP("vsel%s.%s %c%u, %c%u, %c%u\n",
13536           nCC(cond), isF64 ? "f64" : "f32", rch, dd, rch, nn, rch, mm);
13537       return True;
13538    }
13539
13540    /* -------- VRINT{A,N,P,M}.F64 d_d, VRINT{A,N,P,M}.F32 s_s -------- */
13541    /*        31        22 21   17 15 11  8 7  5 4 3
13542       T1/A1: 111111101 D  1110 rm Vd 101 1 01 M 0 Vm VRINT{A,N,P,M}.F64 Dd, Dm
13543       T1/A1: 111111101 D  1110 rm Vd 101 0 01 M 0 Vm VRINT{A,N,P,M}.F32 Sd, Sm
13544
13545       ARM encoding is in NV space.
13546       In Thumb mode, we must not be in an IT block.
13547    */
13548    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1)
13549        && INSN(21,18) == BITS4(1,1,1,0) && INSN(11,9) == BITS3(1,0,1)
13550        && INSN(7,6) == BITS2(0,1) && INSN(4,4) == 0) {
13551       UInt bit_D  = INSN(22,22);
13552       UInt fld_rm = INSN(17,16);
13553       UInt fld_d  = INSN(15,12);
13554       Bool isF64  = INSN(8,8) == 1;
13555       UInt bit_M  = INSN(5,5);
13556       UInt fld_m  = INSN(3,0);
13557
13558       UInt dd = isF64 ? ((bit_D << 4) | fld_d) : ((fld_d << 1) | bit_D);
13559       UInt mm = isF64 ? ((bit_M << 4) | fld_m) : ((fld_m << 1) | bit_M);
13560
13561       if (isT) {
13562          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13563       }
13564       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13565          this must be dynamically unconditional, and we've SIGILLd if not.
13566          In either case we can create unconditional IR. */
13567
13568       UChar c = '?';
13569       IRRoundingMode rm = Irrm_NEAREST;
13570       switch (fld_rm) {
13571          /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
13572             kludge since it doesn't take into account the nearest-even vs
13573             nearest-away semantics. */
13574          case BITS2(0,0): c = 'a'; rm = Irrm_NEAREST; break;
13575          case BITS2(0,1): c = 'n'; rm = Irrm_NEAREST; break;
13576          case BITS2(1,0): c = 'p'; rm = Irrm_PosINF;  break;
13577          case BITS2(1,1): c = 'm'; rm = Irrm_NegINF;  break;
13578          default: vassert(0);
13579       }
13580
13581       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13582       IRExpr* res  = binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13583                            mkU32((UInt)rm), srcM);
13584       (isF64 ? llPutDReg : llPutFReg)(dd, res);
13585
13586       UChar rch = isF64 ? 'd' : 'f';
13587       DIP("vrint%c.%s.%s %c%u, %c%u\n",
13588           c, isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
13589       return True;
13590    }
13591
13592    /* -------- VRINT{Z,R}.F64.F64 d_d, VRINT{Z,R}.F32.F32 s_s -------- */
13593    /*     31   27    22 21     15 11   7  6 5 4 3
13594       T1: 1110 11101 D  110110 Vd 1011 op 1 M 0 Vm VRINT<r><c>.F64.F64 Dd, Dm
13595       A1: cond 11101 D  110110 Vd 1011 op 1 M 0 Vm
13596
13597       T1: 1110 11101 D  110110 Vd 1010 op 1 M 0 Vm VRINT<r><c>.F32.F32 Sd, Sm
13598       A1: cond 11101 D  110110 Vd 1010 op 1 M 0 Vm
13599
13600       In contrast to the VRINT variants just above, this can be conditional.
13601    */
13602    if ((isT ? (INSN(31,28) == BITS4(1,1,1,0)) : True)
13603        && INSN(27,23) == BITS5(1,1,1,0,1) && INSN(21,16) == BITS6(1,1,0,1,1,0)
13604        && INSN(11,9) == BITS3(1,0,1) && INSN(6,6) == 1 && INSN(4,4) == 0) {
13605       UInt bit_D   = INSN(22,22);
13606       UInt fld_Vd  = INSN(15,12);
13607       Bool isF64   = INSN(8,8) == 1;
13608       Bool rToZero = INSN(7,7) == 1;
13609       UInt bit_M   = INSN(5,5);
13610       UInt fld_Vm  = INSN(3,0);
13611       UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
13612       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13613
13614       if (isT) vassert(condT != IRTemp_INVALID);
13615       IRType ty  = isF64 ? Ity_F64 : Ity_F32;
13616       IRTemp src = newTemp(ty);
13617       IRTemp res = newTemp(ty);
13618       assign(src, (isF64 ? getDReg : getFReg)(mm));
13619
13620       IRTemp rm = newTemp(Ity_I32);
13621       assign(rm, rToZero ? mkU32(Irrm_ZERO)
13622                          : mkexpr(mk_get_IR_rounding_mode()));
13623       assign(res, binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13624                         mkexpr(rm), mkexpr(src)));
13625       (isF64 ? putDReg : putFReg)(dd, mkexpr(res), condT);
13626
13627       UChar rch = isF64 ? 'd' : 'f';
13628       DIP("vrint%c.%s.%s %c%u, %c%u\n",
13629           rToZero ? 'z' : 'r',
13630           isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
13631       return True;
13632    }
13633
13634    /* ----------- VCVT{A,N,P,M}{.S32,.U32}{.F64,.F32} ----------- */
13635    /*        31   27    22 21   17 15 11  8  7  6 5 4 3
13636       T1/A1: 1111 11101 D  1111 rm Vd 101 sz op 1 M 0 Vm
13637              VCVT{A,N,P,M}{.S32,.U32}.F64 Sd, Dm
13638              VCVT{A,N,P,M}{.S32,.U32}.F32 Sd, Sm
13639
13640       ARM encoding is in NV space.
13641       In Thumb mode, we must not be in an IT block.
13642    */
13643    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1) && INSN(21,18) == BITS4(1,1,1,1)
13644        && INSN(11,9) == BITS3(1,0,1) && INSN(6,6) == 1 && INSN(4,4) == 0) {
13645       UInt bit_D  = INSN(22,22);
13646       UInt fld_rm = INSN(17,16);
13647       UInt fld_Vd = INSN(15,12);
13648       Bool isF64  = INSN(8,8) == 1;
13649       Bool isU    = INSN(7,7) == 0;
13650       UInt bit_M  = INSN(5,5);
13651       UInt fld_Vm = INSN(3,0);
13652
13653       UInt dd = (fld_Vd << 1) | bit_D;
13654       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13655
13656       if (isT) {
13657          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13658       }
13659       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13660          this must be dynamically unconditional, and we've SIGILLd if not.
13661          In either case we can create unconditional IR. */
13662
13663       UChar c = '?';
13664       IRRoundingMode rm = Irrm_NEAREST;
13665       switch (fld_rm) {
13666          /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
13667             kludge since it doesn't take into account the nearest-even vs
13668             nearest-away semantics. */
13669          case BITS2(0,0): c = 'a'; rm = Irrm_NEAREST; break;
13670          case BITS2(0,1): c = 'n'; rm = Irrm_NEAREST; break;
13671          case BITS2(1,0): c = 'p'; rm = Irrm_PosINF;  break;
13672          case BITS2(1,1): c = 'm'; rm = Irrm_NegINF;  break;
13673          default: vassert(0);
13674       }
13675
13676       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13677       IRTemp   res = newTemp(Ity_I32);
13678
13679       /* The arm back end doesn't support use of Iop_F32toI32U or
13680          Iop_F32toI32S, so for those cases we widen the F32 to F64
13681          and then follow the F64 route. */
13682       if (!isF64) {
13683          srcM = unop(Iop_F32toF64, srcM);
13684       }
13685       assign(res, binop(isU ? Iop_F64toI32U : Iop_F64toI32S,
13686                         mkU32((UInt)rm), srcM));
13687
13688       llPutFReg(dd, unop(Iop_ReinterpI32asF32, mkexpr(res)));
13689
13690       UChar rch = isF64 ? 'd' : 'f';
13691       DIP("vcvt%c.%s.%s %c%u, %c%u\n",
13692           c, isU ? "u32" : "s32", isF64 ? "f64" : "f32", 's', dd, rch, mm);
13693       return True;
13694    }
13695
13696    /* ----------- V{MAX,MIN}NM{.F64 d_d_d, .F32 s_s_s} ----------- */
13697    /* 31   27    22 21 19 15 11  8 7 6  5 4 3
13698       1111 11101 D  00 Vn Vd 101 1 N op M 0 Vm  V{MIN,MAX}NM.F64 Dd, Dn, Dm
13699       1111 11101 D  00 Vn Vd 101 0 N op M 0 Vm  V{MIN,MAX}NM.F32 Sd, Sn, Sm
13700
13701       ARM encoding is in NV space.
13702       In Thumb mode, we must not be in an IT block.
13703    */
13704    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1) && INSN(21,20) == BITS2(0,0)
13705        && INSN(11,9) == BITS3(1,0,1) && INSN(4,4) == 0) {
13706       UInt bit_D  = INSN(22,22);
13707       UInt fld_Vn = INSN(19,16);
13708       UInt fld_Vd = INSN(15,12);
13709       Bool isF64  = INSN(8,8) == 1;
13710       UInt bit_N  = INSN(7,7);
13711       Bool isMAX  = INSN(6,6) == 0;
13712       UInt bit_M  = INSN(5,5);
13713       UInt fld_Vm = INSN(3,0);
13714
13715       UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
13716       UInt nn = isF64 ? ((bit_N << 4) | fld_Vn) : ((fld_Vn << 1) | bit_N);
13717       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13718
13719       if (isT) {
13720          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13721       }
13722       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13723          this must be dynamically unconditional, and we've SIGILLd if not.
13724          In either case we can create unconditional IR. */
13725
13726       IROp op = isF64 ? (isMAX ? Iop_MaxNumF64 : Iop_MinNumF64)
13727                       : (isMAX ? Iop_MaxNumF32 : Iop_MinNumF32);
13728       IRExpr* srcN = (isF64 ? llGetDReg : llGetFReg)(nn);
13729       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13730       IRExpr* res  = binop(op, srcN, srcM);
13731       (isF64 ? llPutDReg : llPutFReg)(dd, res);
13732
13733       UChar rch = isF64 ? 'd' : 'f';
13734       DIP("v%snm.%s %c%u, %c%u, %c%u\n",
13735           isMAX ? "max" : "min", isF64 ? "f64" : "f32",
13736           rch, dd, rch, nn, rch, mm);
13737       return True;
13738    }
13739
13740    /* ----------- VRINTX.F64.F64 d_d, VRINTX.F32.F32 s_s ----------- */
13741    /*     31   27    22 21     15 11  8 7  5 4 3
13742       T1: 1110 11101 D  110111 Vd 101 1 01 M 0 Vm VRINTX<c>.F64.F64 Dd, Dm
13743       A1: cond 11101 D  110111 Vd 101 1 01 M 0 Vm
13744
13745       T1: 1110 11101 D  110111 Vd 101 0 01 M 0 Vm VRINTX<c>.F32.F32 Dd, Dm
13746       A1: cond 11101 D  110111 Vd 101 0 01 M 0 Vm
13747
13748       Like VRINT{Z,R}{.F64.F64, .F32.F32} just above, this can be conditional.
13749       This produces the same code as the VRINTR case since we ignore the
13750       requirement to signal inexactness.
13751    */
13752    if ((isT ? (INSN(31,28) == BITS4(1,1,1,0)) : True)
13753        && INSN(27,23) == BITS5(1,1,1,0,1) && INSN(21,16) == BITS6(1,1,0,1,1,1)
13754        && INSN(11,9) == BITS3(1,0,1) && INSN(7,6) == BITS2(0,1)
13755        && INSN(4,4) == 0) {
13756       UInt bit_D  = INSN(22,22);
13757       UInt fld_Vd = INSN(15,12);
13758       Bool isF64  = INSN(8,8) == 1;
13759       UInt bit_M  = INSN(5,5);
13760       UInt fld_Vm = INSN(3,0);
13761       UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
13762       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13763
13764       if (isT) vassert(condT != IRTemp_INVALID);
13765       IRType ty  = isF64 ? Ity_F64 : Ity_F32;
13766       IRTemp src = newTemp(ty);
13767       IRTemp res = newTemp(ty);
13768       assign(src, (isF64 ? getDReg : getFReg)(mm));
13769
13770       IRTemp rm = newTemp(Ity_I32);
13771       assign(rm, mkexpr(mk_get_IR_rounding_mode()));
13772       assign(res, binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13773                         mkexpr(rm), mkexpr(src)));
13774       (isF64 ? putDReg : putFReg)(dd, mkexpr(res), condT);
13775
13776       UChar rch = isF64 ? 'd' : 'f';
13777       DIP("vrint%c.%s.%s %c%u, %c%u\n",
13778           'x',
13779           isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
13780       return True;
13781    }
13782
13783    /* ----------- V{MAX,MIN}NM{.F32 d_d_d, .F32 q_q_q} ----------- */
13784    /*     31   27    22 21 20 19 15 11   7 6 5 4 3
13785       T1: 1111 11110 D  op 0  Vn Vd 1111 N 1 M 1 Vm  V{MIN,MAX}NM.F32 Qd,Qn,Qm
13786       A1: 1111 00110 D  op 0  Vn Vd 1111 N 1 M 1 Vm
13787
13788       T1: 1111 11110 D  op 0  Vn Vd 1111 N 0 M 1 Vm  V{MIN,MAX}NM.F32 Dd,Dn,Dm
13789       A1: 1111 00110 D  op 0  Vn Vd 1111 N 0 M 1 Vm
13790
13791       ARM encoding is in NV space.
13792       In Thumb mode, we must not be in an IT block.
13793    */
13794    if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,0)
13795                            : BITS9(1,1,1,1,0,0,1,1,0))
13796        && INSN(20,20) == 0 && INSN(11,8) == BITS4(1,1,1,1) && INSN(4,4) == 1) {
13797       UInt bit_D  = INSN(22,22);
13798       Bool isMax  = INSN(21,21) == 0;
13799       UInt fld_Vn = INSN(19,16);
13800       UInt fld_Vd = INSN(15,12);
13801       UInt bit_N  = INSN(7,7);
13802       Bool isQ    = INSN(6,6) == 1;
13803       UInt bit_M  = INSN(5,5);
13804       UInt fld_Vm = INSN(3,0);
13805
13806       /* dd, nn, mm are D-register numbers. */
13807       UInt dd = (bit_D << 4) | fld_Vd;
13808       UInt nn = (bit_N << 4) | fld_Vn;
13809       UInt mm = (bit_M << 4) | fld_Vm;
13810
13811       if (! (isQ && ((dd & 1) == 1 || (nn & 1) == 1 || (mm & 1) == 1))) {
13812          /* Do this piecewise on f regs.  This is a bit tricky
13813             though because we are dealing with the full 16 x Q == 32 x D
13814             register set, so the implied F reg numbers are 0 to 63.  But
13815             ll{Get,Put}FReg only allow the 0 .. 31 as those are the only
13816             architected F regs. */
13817          UInt ddF = dd << 1;
13818          UInt nnF = nn << 1;
13819          UInt mmF = mm << 1;
13820
13821          if (isT) {
13822             gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13823          }
13824          /* In ARM mode, this is statically unconditional.  In Thumb mode,
13825             this must be dynamically unconditional, and we've SIGILLd if not.
13826             In either case we can create unconditional IR. */
13827
13828          IROp op = isMax ? Iop_MaxNumF32 : Iop_MinNumF32;
13829
13830          IRTemp r0 = newTemp(Ity_F32);
13831          IRTemp r1 = newTemp(Ity_F32);
13832          IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13833          IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13834
13835          assign(r0, binop(op, llGetFReg_up_to_64(nnF+0),
13836                               llGetFReg_up_to_64(mmF+0)));
13837          assign(r1, binop(op, llGetFReg_up_to_64(nnF+1),
13838                               llGetFReg_up_to_64(mmF+1)));
13839          if (isQ) {
13840             assign(r2, binop(op, llGetFReg_up_to_64(nnF+2),
13841                                  llGetFReg_up_to_64(mmF+2)));
13842             assign(r3, binop(op, llGetFReg_up_to_64(nnF+3),
13843                                  llGetFReg_up_to_64(mmF+3)));
13844          }
13845          llPutFReg_up_to_64(ddF+0, mkexpr(r0));
13846          llPutFReg_up_to_64(ddF+1, mkexpr(r1));
13847          if (isQ) {
13848             llPutFReg_up_to_64(ddF+2, mkexpr(r2));
13849             llPutFReg_up_to_64(ddF+3, mkexpr(r3));
13850          }
13851
13852          HChar rch = isQ ? 'q' : 'd';
13853          UInt  sh  = isQ ? 1 : 0;
13854          DIP("v%snm.f32 %c%u, %c%u, %c%u\n",
13855               isMax ? "max" : "min", rch,
13856               dd >> sh, rch, nn >> sh, rch, mm >> sh);
13857          return True;
13858       }
13859       /* else fall through */
13860    }
13861
13862    /* ----------- VCVT{A,N,P,M}{.F32 d_d, .F32 q_q} ----------- */
13863    /*     31   27    22 21     15 11 9  7  6 5 4 3
13864       T1: 1111 11111 D  111011 Vd 00 rm op Q M 0 Vm
13865       A1: 1111 00111 D  111011 Vd 00 rm op Q M 0 Vm
13866
13867       ARM encoding is in NV space.
13868       In Thumb mode, we must not be in an IT block.
13869    */
13870    if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,1)
13871                            : BITS9(1,1,1,1,0,0,1,1,1))
13872        && INSN(21,16) == BITS6(1,1,1,0,1,1) && INSN(11,10) == BITS2(0,0)
13873        && INSN(4,4) == 0) {
13874       UInt bit_D  = INSN(22,22);
13875       UInt fld_Vd = INSN(15,12);
13876       UInt fld_rm = INSN(9,8);
13877       Bool isU    = INSN(7,7) == 1;
13878       Bool isQ    = INSN(6,6) == 1;
13879       UInt bit_M  = INSN(5,5);
13880       UInt fld_Vm = INSN(3,0);
13881
13882       /* dd, nn, mm are D-register numbers. */
13883       UInt dd = (bit_D << 4) | fld_Vd;
13884       UInt mm = (bit_M << 4) | fld_Vm;
13885
13886       if (! (isQ && ((dd & 1) == 1 || (mm & 1) == 1))) {
13887          /* Do this piecewise on f regs. */
13888          UInt ddF = dd << 1;
13889          UInt mmF = mm << 1;
13890
13891          if (isT) {
13892             gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13893          }
13894          /* In ARM mode, this is statically unconditional.  In Thumb mode,
13895             this must be dynamically unconditional, and we've SIGILLd if not.
13896             In either case we can create unconditional IR. */
13897
13898          UChar cvtc = '?';
13899          IRRoundingMode rm = Irrm_NEAREST;
13900          switch (fld_rm) {
13901             /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
13902                kludge since it doesn't take into account the nearest-even vs
13903                nearest-away semantics. */
13904             case BITS2(0,0): cvtc = 'a'; rm = Irrm_NEAREST; break;
13905             case BITS2(0,1): cvtc = 'n'; rm = Irrm_NEAREST; break;
13906             case BITS2(1,0): cvtc = 'p'; rm = Irrm_PosINF;  break;
13907             case BITS2(1,1): cvtc = 'm'; rm = Irrm_NegINF;  break;
13908             default: vassert(0);
13909          }
13910
13911          IROp cvt = isU ? Iop_F64toI32U : Iop_F64toI32S;
13912
13913          IRTemp r0 = newTemp(Ity_F32);
13914          IRTemp r1 = newTemp(Ity_F32);
13915          IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13916          IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13917
13918          IRExpr* rmE = mkU32((UInt)rm);
13919
13920          assign(r0, unop(Iop_ReinterpI32asF32,
13921                          binop(cvt, rmE, unop(Iop_F32toF64,
13922                                               llGetFReg_up_to_64(mmF+0)))));
13923          assign(r1, unop(Iop_ReinterpI32asF32,
13924                          binop(cvt, rmE, unop(Iop_F32toF64,
13925                                               llGetFReg_up_to_64(mmF+1)))));
13926          if (isQ) {
13927             assign(r2, unop(Iop_ReinterpI32asF32,
13928                             binop(cvt, rmE, unop(Iop_F32toF64,
13929                                                  llGetFReg_up_to_64(mmF+2)))));
13930             assign(r3, unop(Iop_ReinterpI32asF32,
13931                             binop(cvt, rmE, unop(Iop_F32toF64,
13932                                                  llGetFReg_up_to_64(mmF+3)))));
13933          }
13934
13935          llPutFReg_up_to_64(ddF+0, mkexpr(r0));
13936          llPutFReg_up_to_64(ddF+1, mkexpr(r1));
13937          if (isQ) {
13938             llPutFReg_up_to_64(ddF+2, mkexpr(r2));
13939             llPutFReg_up_to_64(ddF+3, mkexpr(r3));
13940          }
13941
13942          HChar rch = isQ ? 'q' : 'd';
13943          UInt  sh  = isQ ? 1 : 0;
13944          DIP("vcvt%c.%c32.f32 %c%u, %c%u\n",
13945               cvtc, isU ? 'u' : 's', rch, dd >> sh, rch, mm >> sh);
13946          return True;
13947       }
13948       /* else fall through */
13949    }
13950
13951    /* ----------- VRINT{A,N,P,M,X,Z}{.F32 d_d, .F32 q_q} ----------- */
13952    /*     31   27    22 21     15 11 9  6 5 4 3
13953       T1: 1111 11111 D  111010 Vd 01 op Q M 0 Vm
13954       A1: 1111 00111 D  111010 Vd 01 op Q M 0 Vm
13955
13956       ARM encoding is in NV space.
13957       In Thumb mode, we must not be in an IT block.
13958    */
13959    if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,1)
13960                            : BITS9(1,1,1,1,0,0,1,1,1))
13961        && INSN(21,16) == BITS6(1,1,1,0,1,0) && INSN(11,10) == BITS2(0,1)
13962        && INSN(4,4) == 0) {
13963       UInt bit_D  = INSN(22,22);
13964       UInt fld_Vd = INSN(15,12);
13965       UInt fld_op = INSN(9,7);
13966       Bool isQ    = INSN(6,6) == 1;
13967       UInt bit_M  = INSN(5,5);
13968       UInt fld_Vm = INSN(3,0);
13969
13970       /* dd, nn, mm are D-register numbers. */
13971       UInt dd = (bit_D << 4) | fld_Vd;
13972       UInt mm = (bit_M << 4) | fld_Vm;
13973
13974       if (! (fld_op == BITS3(1,0,0) || fld_op == BITS3(1,1,0))
13975           && ! (isQ && ((dd & 1) == 1 || (mm & 1) == 1))) {
13976          /* Do this piecewise on f regs. */
13977          UInt ddF = dd << 1;
13978          UInt mmF = mm << 1;
13979
13980          if (isT) {
13981             gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13982          }
13983          /* In ARM mode, this is statically unconditional.  In Thumb mode,
13984             this must be dynamically unconditional, and we've SIGILLd if not.
13985             In either case we can create unconditional IR. */
13986
13987          UChar cvtc = '?';
13988          IRRoundingMode rm = Irrm_NEAREST;
13989          switch (fld_op) {
13990             /* Various kludges:
13991                - The use of NEAREST for both the 'a' and 'n' cases,
13992                  since it doesn't take into account the nearest-even vs
13993                  nearest-away semantics.
13994                - For the 'x' case, we don't signal inexactness.
13995             */
13996             case BITS3(0,1,0): cvtc = 'a'; rm = Irrm_NEAREST; break;
13997             case BITS3(0,0,0): cvtc = 'n'; rm = Irrm_NEAREST; break;
13998             case BITS3(1,1,1): cvtc = 'p'; rm = Irrm_PosINF;  break;
13999             case BITS3(1,0,1): cvtc = 'm'; rm = Irrm_NegINF;  break;
14000             case BITS3(0,1,1): cvtc = 'z'; rm = Irrm_ZERO;    break;
14001             case BITS3(0,0,1): cvtc = 'x'; rm = Irrm_NEAREST; break;
14002             case BITS3(1,0,0):
14003             case BITS3(1,1,0):
14004             default: vassert(0);
14005          }
14006
14007          IRTemp r0 = newTemp(Ity_F32);
14008          IRTemp r1 = newTemp(Ity_F32);
14009          IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
14010          IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
14011
14012          IRExpr* rmE = mkU32((UInt)rm);
14013          IROp    rnd = Iop_RoundF32toInt;
14014
14015          assign(r0, binop(rnd, rmE, llGetFReg_up_to_64(mmF+0)));
14016          assign(r1, binop(rnd, rmE, llGetFReg_up_to_64(mmF+1)));
14017          if (isQ) {
14018             assign(r2, binop(rnd, rmE, llGetFReg_up_to_64(mmF+2)));
14019             assign(r3, binop(rnd, rmE, llGetFReg_up_to_64(mmF+3)));
14020          }
14021
14022          llPutFReg_up_to_64(ddF+0, mkexpr(r0));
14023          llPutFReg_up_to_64(ddF+1, mkexpr(r1));
14024          if (isQ) {
14025             llPutFReg_up_to_64(ddF+2, mkexpr(r2));
14026             llPutFReg_up_to_64(ddF+3, mkexpr(r3));
14027          }
14028
14029          HChar rch = isQ ? 'q' : 'd';
14030          UInt  sh  = isQ ? 1 : 0;
14031          DIP("vrint%c.f32.f32 %c%u, %c%u\n",
14032              cvtc, rch, dd >> sh, rch, mm >> sh);
14033          return True;
14034       }
14035       /* else fall through */
14036    }
14037
14038    /* ---------- Doesn't match anything. ---------- */
14039    return False;
14040
14041 #  undef INSN
14042 }
14043
14044
14045 /*------------------------------------------------------------*/
14046 /*--- LDMxx/STMxx helper (both ARM and Thumb32)            ---*/
14047 /*------------------------------------------------------------*/
14048
14049 /* Generate IR for LDMxx and STMxx.  This is complex.  Assumes it's
14050    unconditional, so the caller must produce a jump-around before
14051    calling this, if the insn is to be conditional.  Caller is
14052    responsible for all validation of parameters.  For LDMxx, if PC is
14053    amongst the values loaded, caller is also responsible for
14054    generating the jump. */
14055 static void mk_ldm_stm ( Bool arm,     /* True: ARM, False: Thumb */
14056                          UInt rN,      /* base reg */
14057                          UInt bINC,    /* 1: inc,  0: dec */
14058                          UInt bBEFORE, /* 1: inc/dec before, 0: after */
14059                          UInt bW,      /* 1: writeback to Rn */
14060                          UInt bL,      /* 1: load, 0: store */
14061                          UInt regList )
14062 {
14063    Int i, r, m, nRegs;
14064    IRTemp jk = Ijk_Boring;
14065
14066    /* Get hold of the old Rn value.  We might need to write its value
14067       to memory during a store, and if it's also the writeback
14068       register then we need to get its value now.  We can't treat it
14069       exactly like the other registers we're going to transfer,
14070       because for xxMDA and xxMDB writeback forms, the generated IR
14071       updates Rn in the guest state before any transfers take place.
14072       We have to do this as per comments below, in order that if Rn is
14073       the stack pointer then it always has a value is below or equal
14074       to any of the transfer addresses.  Ick. */
14075    IRTemp oldRnT = newTemp(Ity_I32);
14076    assign(oldRnT, arm ? getIRegA(rN) : getIRegT(rN));
14077
14078    IRTemp anchorT = newTemp(Ity_I32);
14079    /* The old (Addison-Wesley) ARM ARM seems to say that LDMxx/STMxx
14080       ignore the bottom two bits of the address.  However, Cortex-A8
14081       doesn't seem to care.  Hence: */
14082    /* No .. don't force alignment .. */
14083    /* assign(anchorT, binop(Iop_And32, mkexpr(oldRnT), mkU32(~3U))); */
14084    /* Instead, use the potentially misaligned address directly. */
14085    assign(anchorT, mkexpr(oldRnT));
14086
14087    IROp opADDorSUB = bINC ? Iop_Add32 : Iop_Sub32;
14088    // bINC == 1:  xxMIA, xxMIB
14089    // bINC == 0:  xxMDA, xxMDB
14090
14091    // For xxMDA and xxMDB, update Rn first if necessary.  We have
14092    // to do this first so that, for the common idiom of the transfers
14093    // faulting because we're pushing stuff onto a stack and the stack
14094    // is growing down onto allocate-on-fault pages (as Valgrind simulates),
14095    // we need to have the SP up-to-date "covering" (pointing below) the
14096    // transfer area.  For the same reason, if we are doing xxMIA or xxMIB,
14097    // do the transfer first, and then update rN afterwards.
14098    nRegs = 0;
14099    for (i = 0; i < 16; i++) {
14100      if ((regList & (1 << i)) != 0)
14101          nRegs++;
14102    }
14103    if (bW == 1 && !bINC) {
14104       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
14105       if (arm)
14106          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
14107       else
14108          putIRegT( rN, e, IRTemp_INVALID );
14109    }
14110
14111    // Make up a list of the registers to transfer, and their offsets
14112    // in memory relative to the anchor.  If the base reg (Rn) is part
14113    // of the transfer, then do it last for a load and first for a store.
14114    UInt xReg[16], xOff[16];
14115    Int  nX = 0;
14116    m = 0;
14117    for (i = 0; i < 16; i++) {
14118       r = bINC ? i : (15-i);
14119       if (0 == (regList & (1<<r)))
14120          continue;
14121       if (bBEFORE)
14122          m++;
14123       /* paranoia: check we aren't transferring the writeback
14124          register during a load. Should be assured by decode-point
14125          check above. */
14126       if (bW == 1 && bL == 1)
14127          vassert(r != rN);
14128
14129       xOff[nX] = 4 * m;
14130       xReg[nX] = r;
14131       nX++;
14132
14133       if (!bBEFORE)
14134          m++;
14135    }
14136    vassert(m == nRegs);
14137    vassert(nX == nRegs);
14138    vassert(nX <= 16);
14139
14140    if (bW == 0 && (regList & (1<<rN)) != 0) {
14141       /* Non-writeback, and basereg is to be transferred.  Do its
14142          transfer last for a load and first for a store.  Requires
14143          reordering xOff/xReg. */
14144       if (0) {
14145          vex_printf("\nREG_LIST_PRE: (rN=%u)\n", rN);
14146          for (i = 0; i < nX; i++)
14147             vex_printf("reg %u   off %u\n", xReg[i], xOff[i]);
14148          vex_printf("\n");
14149       }
14150
14151       vassert(nX > 0);
14152       for (i = 0; i < nX; i++) {
14153          if (xReg[i] == rN)
14154              break;
14155       }
14156       vassert(i < nX); /* else we didn't find it! */
14157       UInt tReg = xReg[i];
14158       UInt tOff = xOff[i];
14159       if (bL == 1) {
14160          /* load; make this transfer happen last */
14161          if (i < nX-1) {
14162             for (m = i+1; m < nX; m++) {
14163                xReg[m-1] = xReg[m];
14164                xOff[m-1] = xOff[m];
14165             }
14166             vassert(m == nX);
14167             xReg[m-1] = tReg;
14168             xOff[m-1] = tOff;
14169          }
14170       } else {
14171          /* store; make this transfer happen first */
14172          if (i > 0) {
14173             for (m = i-1; m >= 0; m--) {
14174                xReg[m+1] = xReg[m];
14175                xOff[m+1] = xOff[m];
14176             }
14177             vassert(m == -1);
14178             xReg[0] = tReg;
14179             xOff[0] = tOff;
14180          }
14181       }
14182
14183       if (0) {
14184          vex_printf("REG_LIST_POST:\n");
14185          for (i = 0; i < nX; i++)
14186             vex_printf("reg %u   off %u\n", xReg[i], xOff[i]);
14187          vex_printf("\n");
14188       }
14189    }
14190
14191    /* According to the Cortex A8 TRM Sec. 5.2.1, LDM(1) with r13 as the base
14192        register and PC in the register list is a return for purposes of branch
14193        prediction.
14194       The ARM ARM Sec. C9.10.1 further specifies that writeback must be enabled
14195        to be counted in event 0x0E (Procedure return).*/
14196    if (rN == 13 && bL == 1 && bINC && !bBEFORE && bW == 1) {
14197       jk = Ijk_Ret;
14198    }
14199
14200    /* Actually generate the transfers */
14201    for (i = 0; i < nX; i++) {
14202       r = xReg[i];
14203       if (bL == 1) {
14204          IRExpr* e = loadLE(Ity_I32,
14205                             binop(opADDorSUB, mkexpr(anchorT),
14206                                   mkU32(xOff[i])));
14207          if (arm) {
14208             putIRegA( r, e, IRTemp_INVALID, jk );
14209          } else {
14210             // no: putIRegT( r, e, IRTemp_INVALID );
14211             // putIRegT refuses to write to R15.  But that might happen.
14212             // Since this is uncond, and we need to be able to
14213             // write the PC, just use the low level put:
14214             llPutIReg( r, e );
14215          }
14216       } else {
14217          /* if we're storing Rn, make sure we use the correct
14218             value, as per extensive comments above */
14219          storeLE( binop(opADDorSUB, mkexpr(anchorT), mkU32(xOff[i])),
14220                   r == rN ? mkexpr(oldRnT)
14221                           : (arm ? getIRegA(r) : getIRegT(r) ) );
14222       }
14223    }
14224
14225    // If we are doing xxMIA or xxMIB,
14226    // do the transfer first, and then update rN afterwards.
14227    if (bW == 1 && bINC) {
14228       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
14229       if (arm)
14230          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
14231       else
14232          putIRegT( rN, e, IRTemp_INVALID );
14233    }
14234 }
14235
14236
14237 /*------------------------------------------------------------*/
14238 /*--- VFP (CP 10 and 11) instructions                      ---*/
14239 /*------------------------------------------------------------*/
14240
14241 /* Both ARM and Thumb */
14242
14243 /* Translate a CP10 or CP11 instruction.  If successful, returns
14244    True and *dres may or may not be updated.  If failure, returns
14245    False and doesn't change *dres nor create any IR.
14246
14247    The ARM and Thumb encodings are identical for the low 28 bits of
14248    the insn (yay!) and that's what the caller must supply, iow, imm28
14249    has the top 4 bits masked out.  Caller is responsible for
14250    determining whether the masked-out bits are valid for a CP10/11
14251    insn.  The rules for the top 4 bits are:
14252
14253      ARM: 0000 to 1110 allowed, and this is the gating condition.
14254      1111 (NV) is not allowed.
14255
14256      Thumb: must be 1110.  The gating condition is taken from
14257      ITSTATE in the normal way.
14258
14259    Conditionalisation:
14260
14261    Caller must supply an IRTemp 'condT' holding the gating condition,
14262    or IRTemp_INVALID indicating the insn is always executed.
14263
14264    Caller must also supply an ARMCondcode 'cond'.  This is only used
14265    for debug printing, no other purpose.  For ARM, this is simply the
14266    top 4 bits of the original instruction.  For Thumb, the condition
14267    is not (really) known until run time, and so ARMCondAL should be
14268    passed, only so that printing of these instructions does not show
14269    any condition.
14270
14271    Finally, the caller must indicate whether this occurs in ARM or
14272    Thumb code.
14273 */
14274 static Bool decode_CP10_CP11_instruction (
14275                /*MOD*/DisResult* dres,
14276                UInt              insn28,
14277                IRTemp            condT,
14278                ARMCondcode       conq,
14279                Bool              isT
14280             )
14281 {
14282 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn28, (_bMax), (_bMin))
14283
14284    vassert(INSN(31,28) == BITS4(0,0,0,0)); // caller's obligation
14285
14286    if (isT) {
14287       vassert(conq == ARMCondAL);
14288    } else {
14289       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
14290    }
14291
14292    /* ----------------------------------------------------------- */
14293    /* -- VFP instructions -- double precision (mostly)         -- */
14294    /* ----------------------------------------------------------- */
14295
14296    /* --------------------- fldmx, fstmx --------------------- */
14297    /*
14298                                  31   27   23   19 15 11   7   0
14299                                          P U WL
14300       C4-100, C5-26  1  FSTMX    cond 1100 1000 Rn Dd 1011 offset
14301       C4-100, C5-28  2  FSTMIAX  cond 1100 1010 Rn Dd 1011 offset
14302       C4-100, C5-30  3  FSTMDBX  cond 1101 0010 Rn Dd 1011 offset
14303
14304       C4-42, C5-26   1  FLDMX    cond 1100 1001 Rn Dd 1011 offset
14305       C4-42, C5-28   2  FLDMIAX  cond 1100 1011 Rn Dd 1011 offset
14306       C4-42, C5-30   3  FLDMDBX  cond 1101 0011 Rn Dd 1011 offset
14307
14308       Regs transferred: Dd .. D(d + (offset-3)/2)
14309       offset must be odd, must not imply a reg > 15
14310       IA/DB: Rn is changed by (4 + 8 x # regs transferred)
14311
14312       case coding:
14313          1  at-Rn   (access at Rn)
14314          2  ia-Rn   (access at Rn, then Rn += 4+8n)
14315          3  db-Rn   (Rn -= 4+8n,   then access at Rn)
14316    */
14317    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
14318        && INSN(11,8) == BITS4(1,0,1,1)) {
14319       UInt bP      = (insn28 >> 24) & 1;
14320       UInt bU      = (insn28 >> 23) & 1;
14321       UInt bW      = (insn28 >> 21) & 1;
14322       UInt bL      = (insn28 >> 20) & 1;
14323       UInt offset  = (insn28 >> 0) & 0xFF;
14324       UInt rN      = INSN(19,16);
14325       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
14326       UInt nRegs   = (offset - 1) / 2;
14327       UInt summary = 0;
14328       Int  i;
14329
14330       /**/ if (bP == 0 && bU == 1 && bW == 0) {
14331          summary = 1;
14332       }
14333       else if (bP == 0 && bU == 1 && bW == 1) {
14334          summary = 2;
14335       }
14336       else if (bP == 1 && bU == 0 && bW == 1) {
14337          summary = 3;
14338       }
14339       else goto after_vfp_fldmx_fstmx;
14340
14341       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
14342       if (rN == 15 && (summary == 2 || summary == 3 || isT))
14343          goto after_vfp_fldmx_fstmx;
14344
14345       /* offset must be odd, and specify at least one register */
14346       if (0 == (offset & 1) || offset < 3)
14347          goto after_vfp_fldmx_fstmx;
14348
14349       /* can't transfer regs after D15 */
14350       if (dD + nRegs - 1 >= 32)
14351          goto after_vfp_fldmx_fstmx;
14352
14353       /* Now, we can't do a conditional load or store, since that very
14354          likely will generate an exception.  So we have to take a side
14355          exit at this point if the condition is false. */
14356       if (condT != IRTemp_INVALID) {
14357          if (isT)
14358             mk_skip_over_T32_if_cond_is_false( condT );
14359          else
14360             mk_skip_over_A32_if_cond_is_false( condT );
14361          condT = IRTemp_INVALID;
14362       }
14363       /* Ok, now we're unconditional.  Do the load or store. */
14364
14365       /* get the old Rn value */
14366       IRTemp rnT = newTemp(Ity_I32);
14367       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
14368                            rN == 15));
14369
14370       /* make a new value for Rn, post-insn */
14371       IRTemp rnTnew = IRTemp_INVALID;
14372       if (summary == 2 || summary == 3) {
14373          rnTnew = newTemp(Ity_I32);
14374          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
14375                               mkexpr(rnT),
14376                               mkU32(4 + 8 * nRegs)));
14377       }
14378
14379       /* decide on the base transfer address */
14380       IRTemp taT = newTemp(Ity_I32);
14381       assign(taT,  summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
14382
14383       /* update Rn if necessary -- in case 3, we're moving it down, so
14384          update before any memory reference, in order to keep Memcheck
14385          and V's stack-extending logic (on linux) happy */
14386       if (summary == 3) {
14387          if (isT)
14388             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14389          else
14390             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14391       }
14392
14393       /* generate the transfers */
14394       for (i = 0; i < nRegs; i++) {
14395          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
14396          if (bL) {
14397             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
14398          } else {
14399             storeLE(addr, getDReg(dD + i));
14400          }
14401       }
14402
14403       /* update Rn if necessary -- in case 2, we're moving it up, so
14404          update after any memory reference, in order to keep Memcheck
14405          and V's stack-extending logic (on linux) happy */
14406       if (summary == 2) {
14407          if (isT)
14408             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14409          else
14410             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14411       }
14412
14413       const HChar* nm = bL==1 ? "ld" : "st";
14414       switch (summary) {
14415          case 1:  DIP("f%smx%s r%u, {d%u-d%u}\n",
14416                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14417                   break;
14418          case 2:  DIP("f%smiax%s r%u!, {d%u-d%u}\n",
14419                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14420                   break;
14421          case 3:  DIP("f%smdbx%s r%u!, {d%u-d%u}\n",
14422                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14423                   break;
14424          default: vassert(0);
14425       }
14426
14427       goto decode_success_vfp;
14428       /* FIXME alignment constraints? */
14429    }
14430
14431   after_vfp_fldmx_fstmx:
14432
14433    /* --------------------- fldmd, fstmd --------------------- */
14434    /*
14435                                  31   27   23   19 15 11   7   0
14436                                          P U WL
14437       C4-96, C5-26   1  FSTMD    cond 1100 1000 Rn Dd 1011 offset
14438       C4-96, C5-28   2  FSTMDIA  cond 1100 1010 Rn Dd 1011 offset
14439       C4-96, C5-30   3  FSTMDDB  cond 1101 0010 Rn Dd 1011 offset
14440
14441       C4-38, C5-26   1  FLDMD    cond 1100 1001 Rn Dd 1011 offset
14442       C4-38, C5-28   2  FLDMIAD  cond 1100 1011 Rn Dd 1011 offset
14443       C4-38, C5-30   3  FLDMDBD  cond 1101 0011 Rn Dd 1011 offset
14444
14445       Regs transferred: Dd .. D(d + (offset-2)/2)
14446       offset must be even, must not imply a reg > 15
14447       IA/DB: Rn is changed by (8 x # regs transferred)
14448
14449       case coding:
14450          1  at-Rn   (access at Rn)
14451          2  ia-Rn   (access at Rn, then Rn += 8n)
14452          3  db-Rn   (Rn -= 8n,     then access at Rn)
14453    */
14454    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
14455        && INSN(11,8) == BITS4(1,0,1,1)) {
14456       UInt bP      = (insn28 >> 24) & 1;
14457       UInt bU      = (insn28 >> 23) & 1;
14458       UInt bW      = (insn28 >> 21) & 1;
14459       UInt bL      = (insn28 >> 20) & 1;
14460       UInt offset  = (insn28 >> 0) & 0xFF;
14461       UInt rN      = INSN(19,16);
14462       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
14463       UInt nRegs   = offset / 2;
14464       UInt summary = 0;
14465       Int  i;
14466
14467       /**/ if (bP == 0 && bU == 1 && bW == 0) {
14468          summary = 1;
14469       }
14470       else if (bP == 0 && bU == 1 && bW == 1) {
14471          summary = 2;
14472       }
14473       else if (bP == 1 && bU == 0 && bW == 1) {
14474          summary = 3;
14475       }
14476       else goto after_vfp_fldmd_fstmd;
14477
14478       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
14479       if (rN == 15 && (summary == 2 || summary == 3 || isT))
14480          goto after_vfp_fldmd_fstmd;
14481
14482       /* offset must be even, and specify at least one register */
14483       if (1 == (offset & 1) || offset < 2)
14484          goto after_vfp_fldmd_fstmd;
14485
14486       /* can't transfer regs after D15 */
14487       if (dD + nRegs - 1 >= 32)
14488          goto after_vfp_fldmd_fstmd;
14489
14490       /* Now, we can't do a conditional load or store, since that very
14491          likely will generate an exception.  So we have to take a side
14492          exit at this point if the condition is false. */
14493       if (condT != IRTemp_INVALID) {
14494          if (isT)
14495             mk_skip_over_T32_if_cond_is_false( condT );
14496          else
14497             mk_skip_over_A32_if_cond_is_false( condT );
14498          condT = IRTemp_INVALID;
14499       }
14500       /* Ok, now we're unconditional.  Do the load or store. */
14501
14502       /* get the old Rn value */
14503       IRTemp rnT = newTemp(Ity_I32);
14504       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
14505                            rN == 15));
14506
14507       /* make a new value for Rn, post-insn */
14508       IRTemp rnTnew = IRTemp_INVALID;
14509       if (summary == 2 || summary == 3) {
14510          rnTnew = newTemp(Ity_I32);
14511          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
14512                               mkexpr(rnT),
14513                               mkU32(8 * nRegs)));
14514       }
14515
14516       /* decide on the base transfer address */
14517       IRTemp taT = newTemp(Ity_I32);
14518       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
14519
14520       /* update Rn if necessary -- in case 3, we're moving it down, so
14521          update before any memory reference, in order to keep Memcheck
14522          and V's stack-extending logic (on linux) happy */
14523       if (summary == 3) {
14524          if (isT)
14525             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14526          else
14527             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14528       }
14529
14530       /* generate the transfers */
14531       for (i = 0; i < nRegs; i++) {
14532          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
14533          if (bL) {
14534             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
14535          } else {
14536             storeLE(addr, getDReg(dD + i));
14537          }
14538       }
14539
14540       /* update Rn if necessary -- in case 2, we're moving it up, so
14541          update after any memory reference, in order to keep Memcheck
14542          and V's stack-extending logic (on linux) happy */
14543       if (summary == 2) {
14544          if (isT)
14545             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14546          else
14547             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14548       }
14549
14550       const HChar* nm = bL==1 ? "ld" : "st";
14551       switch (summary) {
14552          case 1:  DIP("f%smd%s r%u, {d%u-d%u}\n",
14553                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14554                   break;
14555          case 2:  DIP("f%smiad%s r%u!, {d%u-d%u}\n",
14556                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14557                   break;
14558          case 3:  DIP("f%smdbd%s r%u!, {d%u-d%u}\n",
14559                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14560                   break;
14561          default: vassert(0);
14562       }
14563
14564       goto decode_success_vfp;
14565       /* FIXME alignment constraints? */
14566    }
14567
14568   after_vfp_fldmd_fstmd:
14569
14570    /* ------------------- fmrx, fmxr ------------------- */
14571    if (BITS8(1,1,1,0,1,1,1,1) == INSN(27,20)
14572        && BITS4(1,0,1,0) == INSN(11,8)
14573        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
14574       UInt rD  = INSN(15,12);
14575       UInt reg = INSN(19,16);
14576       if (reg == BITS4(0,0,0,1)) {
14577          if (rD == 15) {
14578             IRTemp nzcvT = newTemp(Ity_I32);
14579             /* When rD is 15, we are copying the top 4 bits of FPSCR
14580                into CPSR.  That is, set the flags thunk to COPY and
14581                install FPSCR[31:28] as the value to copy. */
14582             assign(nzcvT, binop(Iop_And32,
14583                                 IRExpr_Get(OFFB_FPSCR, Ity_I32),
14584                                 mkU32(0xF0000000)));
14585             setFlags_D1(ARMG_CC_OP_COPY, nzcvT, condT);
14586             DIP("fmstat%s\n", nCC(conq));
14587          } else {
14588             /* Otherwise, merely transfer FPSCR to r0 .. r14. */
14589             IRExpr* e = IRExpr_Get(OFFB_FPSCR, Ity_I32);
14590             if (isT)
14591                putIRegT(rD, e, condT);
14592             else
14593                putIRegA(rD, e, condT, Ijk_Boring);
14594             DIP("fmrx%s r%u, fpscr\n", nCC(conq), rD);
14595          }
14596          goto decode_success_vfp;
14597       }
14598       /* fall through */
14599    }
14600
14601    if (BITS8(1,1,1,0,1,1,1,0) == INSN(27,20)
14602        && BITS4(1,0,1,0) == INSN(11,8)
14603        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
14604       UInt rD  = INSN(15,12);
14605       UInt reg = INSN(19,16);
14606       if (reg == BITS4(0,0,0,1)) {
14607          putMiscReg32(OFFB_FPSCR,
14608                       isT ? getIRegT(rD) : getIRegA(rD), condT);
14609          DIP("fmxr%s fpscr, r%u\n", nCC(conq), rD);
14610          goto decode_success_vfp;
14611       }
14612       /* fall through */
14613    }
14614
14615    /* --------------------- vmov --------------------- */
14616    // VMOV dM, rD, rN
14617    if (0x0C400B10 == (insn28 & 0x0FF00FD0)) {
14618       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
14619       UInt rD = INSN(15,12); /* lo32 */
14620       UInt rN = INSN(19,16); /* hi32 */
14621       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))) {
14622          /* fall through */
14623       } else {
14624          putDReg(dM,
14625                  unop(Iop_ReinterpI64asF64,
14626                       binop(Iop_32HLto64,
14627                             isT ? getIRegT(rN) : getIRegA(rN),
14628                             isT ? getIRegT(rD) : getIRegA(rD))),
14629                  condT);
14630          DIP("vmov%s d%u, r%u, r%u\n", nCC(conq), dM, rD, rN);
14631          goto decode_success_vfp;
14632       }
14633       /* fall through */
14634    }
14635
14636    // VMOV rD, rN, dM
14637    if (0x0C500B10 == (insn28 & 0x0FF00FD0)) {
14638       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
14639       UInt rD = INSN(15,12); /* lo32 */
14640       UInt rN = INSN(19,16); /* hi32 */
14641       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))
14642           || rD == rN) {
14643          /* fall through */
14644       } else {
14645          IRTemp i64 = newTemp(Ity_I64);
14646          assign(i64, unop(Iop_ReinterpF64asI64, getDReg(dM)));
14647          IRExpr* hi32 = unop(Iop_64HIto32, mkexpr(i64));
14648          IRExpr* lo32 = unop(Iop_64to32,   mkexpr(i64));
14649          if (isT) {
14650             putIRegT(rN, hi32, condT);
14651             putIRegT(rD, lo32, condT);
14652          } else {
14653             putIRegA(rN, hi32, condT, Ijk_Boring);
14654             putIRegA(rD, lo32, condT, Ijk_Boring);
14655          }
14656          DIP("vmov%s r%u, r%u, d%u\n", nCC(conq), rD, rN, dM);
14657          goto decode_success_vfp;
14658       }
14659       /* fall through */
14660    }
14661
14662    // VMOV sD, sD+1, rN, rM
14663    if (0x0C400A10 == (insn28 & 0x0FF00FD0)) {
14664       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
14665       UInt rN = INSN(15,12);
14666       UInt rM = INSN(19,16);
14667       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
14668           || sD == 31) {
14669          /* fall through */
14670       } else {
14671          putFReg(sD,
14672                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rN) : getIRegA(rN)),
14673                  condT);
14674          putFReg(sD+1,
14675                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rM) : getIRegA(rM)),
14676                  condT);
14677          DIP("vmov%s, s%u, s%u, r%u, r%u\n",
14678               nCC(conq), sD, sD + 1, rN, rM);
14679          goto decode_success_vfp;
14680       }
14681    }
14682
14683    // VMOV rN, rM, sD, sD+1
14684    if (0x0C500A10 == (insn28 & 0x0FF00FD0)) {
14685       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
14686       UInt rN = INSN(15,12);
14687       UInt rM = INSN(19,16);
14688       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
14689           || sD == 31 || rN == rM) {
14690          /* fall through */
14691       } else {
14692          IRExpr* res0 = unop(Iop_ReinterpF32asI32, getFReg(sD));
14693          IRExpr* res1 = unop(Iop_ReinterpF32asI32, getFReg(sD+1));
14694          if (isT) {
14695             putIRegT(rN, res0, condT);
14696             putIRegT(rM, res1, condT);
14697          } else {
14698             putIRegA(rN, res0, condT, Ijk_Boring);
14699             putIRegA(rM, res1, condT, Ijk_Boring);
14700          }
14701          DIP("vmov%s, r%u, r%u, s%u, s%u\n",
14702              nCC(conq), rN, rM, sD, sD + 1);
14703          goto decode_success_vfp;
14704       }
14705    }
14706
14707    // VMOV rD[x], rT  (ARM core register to scalar)
14708    if (0x0E000B10 == (insn28 & 0x0F900F1F)) {
14709       UInt rD  = (INSN(7,7) << 4) | INSN(19,16);
14710       UInt rT  = INSN(15,12);
14711       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
14712       UInt index;
14713       if (rT == 15 || (isT && rT == 13)) {
14714          /* fall through */
14715       } else {
14716          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
14717             index = opc & 7;
14718             putDRegI64(rD, triop(Iop_SetElem8x8,
14719                                  getDRegI64(rD),
14720                                  mkU8(index),
14721                                  unop(Iop_32to8,
14722                                       isT ? getIRegT(rT) : getIRegA(rT))),
14723                            condT);
14724             DIP("vmov%s.8 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
14725             goto decode_success_vfp;
14726          }
14727          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
14728             index = (opc >> 1) & 3;
14729             putDRegI64(rD, triop(Iop_SetElem16x4,
14730                                  getDRegI64(rD),
14731                                  mkU8(index),
14732                                  unop(Iop_32to16,
14733                                       isT ? getIRegT(rT) : getIRegA(rT))),
14734                            condT);
14735             DIP("vmov%s.16 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
14736             goto decode_success_vfp;
14737          }
14738          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0)) {
14739             index = (opc >> 2) & 1;
14740             putDRegI64(rD, triop(Iop_SetElem32x2,
14741                                  getDRegI64(rD),
14742                                  mkU8(index),
14743                                  isT ? getIRegT(rT) : getIRegA(rT)),
14744                            condT);
14745             DIP("vmov%s.32 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
14746             goto decode_success_vfp;
14747          } else {
14748             /* fall through */
14749          }
14750       }
14751    }
14752
14753    // VMOV (scalar to ARM core register)
14754    // VMOV rT, rD[x]
14755    if (0x0E100B10 == (insn28 & 0x0F100F1F)) {
14756       UInt rN  = (INSN(7,7) << 4) | INSN(19,16);
14757       UInt rT  = INSN(15,12);
14758       UInt U   = INSN(23,23);
14759       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
14760       UInt index;
14761       if (rT == 15 || (isT && rT == 13)) {
14762          /* fall through */
14763       } else {
14764          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
14765             index = opc & 7;
14766             IRExpr* e = unop(U ? Iop_8Uto32 : Iop_8Sto32,
14767                              binop(Iop_GetElem8x8,
14768                                    getDRegI64(rN),
14769                                    mkU8(index)));
14770             if (isT)
14771                putIRegT(rT, e, condT);
14772             else
14773                putIRegA(rT, e, condT, Ijk_Boring);
14774             DIP("vmov%s.%c8 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
14775                   rT, rN, index);
14776             goto decode_success_vfp;
14777          }
14778          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
14779             index = (opc >> 1) & 3;
14780             IRExpr* e = unop(U ? Iop_16Uto32 : Iop_16Sto32,
14781                              binop(Iop_GetElem16x4,
14782                                    getDRegI64(rN),
14783                                    mkU8(index)));
14784             if (isT)
14785                putIRegT(rT, e, condT);
14786             else
14787                putIRegA(rT, e, condT, Ijk_Boring);
14788             DIP("vmov%s.%c16 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
14789                   rT, rN, index);
14790             goto decode_success_vfp;
14791          }
14792          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0) && U == 0) {
14793             index = (opc >> 2) & 1;
14794             IRExpr* e = binop(Iop_GetElem32x2, getDRegI64(rN), mkU8(index));
14795             if (isT)
14796                putIRegT(rT, e, condT);
14797             else
14798                putIRegA(rT, e, condT, Ijk_Boring);
14799             DIP("vmov%s.32 r%u, d%u[%u]\n", nCC(conq), rT, rN, index);
14800             goto decode_success_vfp;
14801          } else {
14802             /* fall through */
14803          }
14804       }
14805    }
14806
14807    // VMOV.F32 sD, #imm
14808    // FCONSTS sD, #imm
14809    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14810        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,0)) {
14811       UInt rD   = (INSN(15,12) << 1) | INSN(22,22);
14812       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
14813       UInt b    = (imm8 >> 6) & 1;
14814       UInt imm;
14815       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,(imm8 >> 5) & 1) << 8)
14816              | ((imm8 & 0x1f) << 3);
14817       imm <<= 16;
14818       putFReg(rD, unop(Iop_ReinterpI32asF32, mkU32(imm)), condT);
14819       DIP("fconsts%s s%u #%u", nCC(conq), rD, imm8);
14820       goto decode_success_vfp;
14821    }
14822
14823    // VMOV.F64 dD, #imm
14824    // FCONSTD dD, #imm
14825    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14826        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,1)) {
14827       UInt rD   = INSN(15,12) | (INSN(22,22) << 4);
14828       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
14829       UInt b    = (imm8 >> 6) & 1;
14830       ULong imm;
14831       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,b) << 8)
14832              | BITS8(b,b,0,0,0,0,0,0) | (imm8 & 0x3f);
14833       imm <<= 48;
14834       putDReg(rD, unop(Iop_ReinterpI64asF64, mkU64(imm)), condT);
14835       DIP("fconstd%s d%u #%u", nCC(conq), rD, imm8);
14836       goto decode_success_vfp;
14837    }
14838
14839    /* ---------------------- vdup ------------------------- */
14840    // VDUP dD, rT
14841    // VDUP qD, rT
14842    if (BITS8(1,1,1,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,1))
14843        && BITS4(1,0,1,1) == INSN(11,8) && INSN(6,6) == 0 && INSN(4,4) == 1) {
14844       UInt rD   = (INSN(7,7) << 4) | INSN(19,16);
14845       UInt rT   = INSN(15,12);
14846       UInt Q    = INSN(21,21);
14847       UInt size = (INSN(22,22) << 1) | INSN(5,5);
14848       if (rT == 15 || (isT && rT == 13) || size == 3 || (Q && (rD & 1))) {
14849          /* fall through */
14850       } else {
14851          IRExpr* e = isT ? getIRegT(rT) : getIRegA(rT);
14852          if (Q) {
14853             rD >>= 1;
14854             switch (size) {
14855                case 0:
14856                   putQReg(rD, unop(Iop_Dup32x4, e), condT);
14857                   break;
14858                case 1:
14859                   putQReg(rD, unop(Iop_Dup16x8, unop(Iop_32to16, e)),
14860                               condT);
14861                   break;
14862                case 2:
14863                   putQReg(rD, unop(Iop_Dup8x16, unop(Iop_32to8, e)),
14864                               condT);
14865                   break;
14866                default:
14867                   vassert(0);
14868             }
14869             DIP("vdup.%d q%u, r%u\n", 32 / (1<<size), rD, rT);
14870          } else {
14871             switch (size) {
14872                case 0:
14873                   putDRegI64(rD, unop(Iop_Dup32x2, e), condT);
14874                   break;
14875                case 1:
14876                   putDRegI64(rD, unop(Iop_Dup16x4, unop(Iop_32to16, e)),
14877                                condT);
14878                   break;
14879                case 2:
14880                   putDRegI64(rD, unop(Iop_Dup8x8, unop(Iop_32to8, e)),
14881                                condT);
14882                   break;
14883                default:
14884                   vassert(0);
14885             }
14886             DIP("vdup.%d d%u, r%u\n", 32 / (1<<size), rD, rT);
14887          }
14888          goto decode_success_vfp;
14889       }
14890    }
14891
14892    /* --------------------- f{ld,st}d --------------------- */
14893    // FLDD, FSTD
14894    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
14895        && BITS4(1,0,1,1) == INSN(11,8)) {
14896       UInt dD     = INSN(15,12) | (INSN(22,22) << 4);
14897       UInt rN     = INSN(19,16);
14898       UInt offset = (insn28 & 0xFF) << 2;
14899       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
14900       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
14901       /* make unconditional */
14902       if (condT != IRTemp_INVALID) {
14903          if (isT)
14904             mk_skip_over_T32_if_cond_is_false( condT );
14905          else
14906             mk_skip_over_A32_if_cond_is_false( condT );
14907          condT = IRTemp_INVALID;
14908       }
14909       IRTemp ea = newTemp(Ity_I32);
14910       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
14911                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
14912                                 rN == 15),
14913                        mkU32(offset)));
14914       if (bL) {
14915          putDReg(dD, loadLE(Ity_F64,mkexpr(ea)), IRTemp_INVALID);
14916       } else {
14917          storeLE(mkexpr(ea), getDReg(dD));
14918       }
14919       DIP("f%sd%s d%u, [r%u, %c#%u]\n",
14920           bL ? "ld" : "st", nCC(conq), dD, rN,
14921           bU ? '+' : '-', offset);
14922       goto decode_success_vfp;
14923    }
14924
14925    /* --------------------- dp insns (D) --------------------- */
14926    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
14927        && BITS4(1,0,1,1) == INSN(11,8)
14928        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
14929       UInt    dM  = INSN(3,0)   | (INSN(5,5) << 4);       /* argR */
14930       UInt    dD  = INSN(15,12) | (INSN(22,22) << 4);   /* dst/acc */
14931       UInt    dN  = INSN(19,16) | (INSN(7,7) << 4);     /* argL */
14932       UInt    bP  = (insn28 >> 23) & 1;
14933       UInt    bQ  = (insn28 >> 21) & 1;
14934       UInt    bR  = (insn28 >> 20) & 1;
14935       UInt    bS  = (insn28 >> 6) & 1;
14936       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
14937       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
14938       switch (opc) {
14939          case BITS4(0,0,0,0): /* MAC: d + n * m */
14940             putDReg(dD, triop(Iop_AddF64, rm,
14941                               getDReg(dD),
14942                               triop(Iop_MulF64, rm, getDReg(dN),
14943                                                     getDReg(dM))),
14944                         condT);
14945             DIP("fmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14946             goto decode_success_vfp;
14947          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
14948             putDReg(dD, triop(Iop_AddF64, rm,
14949                               getDReg(dD),
14950                               unop(Iop_NegF64,
14951                                    triop(Iop_MulF64, rm, getDReg(dN),
14952                                                          getDReg(dM)))),
14953                         condT);
14954             DIP("fnmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14955             goto decode_success_vfp;
14956          case BITS4(0,0,1,0): /* MSC: - d + n * m */
14957             putDReg(dD, triop(Iop_AddF64, rm,
14958                               unop(Iop_NegF64, getDReg(dD)),
14959                               triop(Iop_MulF64, rm, getDReg(dN),
14960                                                     getDReg(dM))),
14961                         condT);
14962             DIP("fmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14963             goto decode_success_vfp;
14964          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
14965             putDReg(dD, triop(Iop_AddF64, rm,
14966                               unop(Iop_NegF64, getDReg(dD)),
14967                               unop(Iop_NegF64,
14968                                    triop(Iop_MulF64, rm, getDReg(dN),
14969                                                          getDReg(dM)))),
14970                         condT);
14971             DIP("fnmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14972             goto decode_success_vfp;
14973          case BITS4(0,1,0,0): /* MUL: n * m */
14974             putDReg(dD, triop(Iop_MulF64, rm, getDReg(dN), getDReg(dM)),
14975                         condT);
14976             DIP("fmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14977             goto decode_success_vfp;
14978          case BITS4(0,1,0,1): /* NMUL: - n * m */
14979             putDReg(dD, unop(Iop_NegF64,
14980                              triop(Iop_MulF64, rm, getDReg(dN),
14981                                                    getDReg(dM))),
14982                     condT);
14983             DIP("fnmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14984             goto decode_success_vfp;
14985          case BITS4(0,1,1,0): /* ADD: n + m */
14986             putDReg(dD, triop(Iop_AddF64, rm, getDReg(dN), getDReg(dM)),
14987                         condT);
14988             DIP("faddd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14989             goto decode_success_vfp;
14990          case BITS4(0,1,1,1): /* SUB: n - m */
14991             putDReg(dD, triop(Iop_SubF64, rm, getDReg(dN), getDReg(dM)),
14992                         condT);
14993             DIP("fsubd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14994             goto decode_success_vfp;
14995          case BITS4(1,0,0,0): /* DIV: n / m */
14996             putDReg(dD, triop(Iop_DivF64, rm, getDReg(dN), getDReg(dM)),
14997                         condT);
14998             DIP("fdivd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14999             goto decode_success_vfp;
15000          case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
15001             /* XXXROUNDINGFIXME look up ARM reference for fused
15002                multiply-add rounding */
15003             putDReg(dD, triop(Iop_AddF64, rm,
15004                               unop(Iop_NegF64, getDReg(dD)),
15005                               triop(Iop_MulF64, rm,
15006                                                 getDReg(dN),
15007                                                 getDReg(dM))),
15008                         condT);
15009             DIP("vfnmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15010             goto decode_success_vfp;
15011          case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
15012             /* XXXROUNDINGFIXME look up ARM reference for fused
15013                multiply-add rounding */
15014             putDReg(dD, triop(Iop_AddF64, rm,
15015                               unop(Iop_NegF64, getDReg(dD)),
15016                               triop(Iop_MulF64, rm,
15017                                                 unop(Iop_NegF64, getDReg(dN)),
15018                                                 getDReg(dM))),
15019                         condT);
15020             DIP("vfnmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15021             goto decode_success_vfp;
15022          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
15023             /* XXXROUNDINGFIXME look up ARM reference for fused
15024                multiply-add rounding */
15025             putDReg(dD, triop(Iop_AddF64, rm,
15026                               getDReg(dD),
15027                               triop(Iop_MulF64, rm, getDReg(dN),
15028                                                     getDReg(dM))),
15029                         condT);
15030             DIP("vfmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15031             goto decode_success_vfp;
15032          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
15033             /* XXXROUNDINGFIXME look up ARM reference for fused
15034                multiply-add rounding */
15035             putDReg(dD, triop(Iop_AddF64, rm,
15036                               getDReg(dD),
15037                               triop(Iop_MulF64, rm,
15038                                     unop(Iop_NegF64, getDReg(dN)),
15039                                     getDReg(dM))),
15040                         condT);
15041             DIP("vfmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15042             goto decode_success_vfp;
15043          default:
15044             break;
15045       }
15046    }
15047
15048    /* --------------------- compares (D) --------------------- */
15049    /*          31   27   23   19   15 11   7    3
15050                  28   24   20   16 12    8    4    0
15051       FCMPD    cond 1110 1D11 0100 Dd 1011 0100 Dm
15052       FCMPED   cond 1110 1D11 0100 Dd 1011 1100 Dm
15053       FCMPZD   cond 1110 1D11 0101 Dd 1011 0100 0000
15054       FCMPZED  cond 1110 1D11 0101 Dd 1011 1100 0000
15055                                  Z         N
15056
15057       Z=0 Compare Dd vs Dm     and set FPSCR 31:28 accordingly
15058       Z=1 Compare Dd vs zero
15059
15060       N=1 generates Invalid Operation exn if either arg is any kind of NaN
15061       N=0 generates Invalid Operation exn if either arg is a signalling NaN
15062       (Not that we pay any attention to N here)
15063    */
15064    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15065        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15066        && BITS4(1,0,1,1) == INSN(11,8)
15067        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15068       UInt bZ = (insn28 >> 16) & 1;
15069       UInt bN = (insn28 >> 7) & 1;
15070       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
15071       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
15072       if (bZ && INSN(3,0) != 0) {
15073          /* does not decode; fall through */
15074       } else {
15075          IRTemp argL = newTemp(Ity_F64);
15076          IRTemp argR = newTemp(Ity_F64);
15077          IRTemp irRes = newTemp(Ity_I32);
15078          assign(argL, getDReg(dD));
15079          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0)) : getDReg(dM));
15080          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
15081
15082          IRTemp nzcv     = IRTemp_INVALID;
15083          IRTemp oldFPSCR = newTemp(Ity_I32);
15084          IRTemp newFPSCR = newTemp(Ity_I32);
15085
15086          /* This is where the fun starts.  We have to convert 'irRes'
15087             from an IR-convention return result (IRCmpF64Result) to an
15088             ARM-encoded (N,Z,C,V) group.  The final result is in the
15089             bottom 4 bits of 'nzcv'. */
15090          /* Map compare result from IR to ARM(nzcv) */
15091          /*
15092             FP cmp result | IR   | ARM(nzcv)
15093             --------------------------------
15094             UN              0x45   0011
15095             LT              0x01   1000
15096             GT              0x00   0010
15097             EQ              0x40   0110
15098          */
15099          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
15100
15101          /* And update FPSCR accordingly */
15102          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
15103          assign(newFPSCR,
15104                 binop(Iop_Or32,
15105                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
15106                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
15107
15108          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
15109
15110          if (bZ) {
15111             DIP("fcmpz%sd%s d%u\n", bN ? "e" : "", nCC(conq), dD);
15112          } else {
15113             DIP("fcmp%sd%s d%u, d%u\n", bN ? "e" : "", nCC(conq), dD, dM);
15114          }
15115          goto decode_success_vfp;
15116       }
15117       /* fall through */
15118    }
15119
15120    /* --------------------- unary (D) --------------------- */
15121    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15122        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15123        && BITS4(1,0,1,1) == INSN(11,8)
15124        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15125       UInt dD  = INSN(15,12) | (INSN(22,22) << 4);
15126       UInt dM  = INSN(3,0) | (INSN(5,5) << 4);
15127       UInt b16 = (insn28 >> 16) & 1;
15128       UInt b7  = (insn28 >> 7) & 1;
15129       /**/ if (b16 == 0 && b7 == 0) {
15130          // FCPYD
15131          putDReg(dD, getDReg(dM), condT);
15132          DIP("fcpyd%s d%u, d%u\n", nCC(conq), dD, dM);
15133          goto decode_success_vfp;
15134       }
15135       else if (b16 == 0 && b7 == 1) {
15136          // FABSD
15137          putDReg(dD, unop(Iop_AbsF64, getDReg(dM)), condT);
15138          DIP("fabsd%s d%u, d%u\n", nCC(conq), dD, dM);
15139          goto decode_success_vfp;
15140       }
15141       else if (b16 == 1 && b7 == 0) {
15142          // FNEGD
15143          putDReg(dD, unop(Iop_NegF64, getDReg(dM)), condT);
15144          DIP("fnegd%s d%u, d%u\n", nCC(conq), dD, dM);
15145          goto decode_success_vfp;
15146       }
15147       else if (b16 == 1 && b7 == 1) {
15148          // FSQRTD
15149          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
15150          putDReg(dD, binop(Iop_SqrtF64, rm, getDReg(dM)), condT);
15151          DIP("fsqrtd%s d%u, d%u\n", nCC(conq), dD, dM);
15152          goto decode_success_vfp;
15153       }
15154       else
15155          vassert(0);
15156
15157       /* fall through */
15158    }
15159
15160    /* ----------------- I <-> D conversions ----------------- */
15161
15162    // F{S,U}ITOD dD, fM
15163    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15164        && BITS4(1,0,0,0) == (INSN(19,16) & BITS4(1,1,1,1))
15165        && BITS4(1,0,1,1) == INSN(11,8)
15166        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15167       UInt bM    = (insn28 >> 5) & 1;
15168       UInt fM    = (INSN(3,0) << 1) | bM;
15169       UInt dD    = INSN(15,12) | (INSN(22,22) << 4);
15170       UInt syned = (insn28 >> 7) & 1;
15171       if (syned) {
15172          // FSITOD
15173          putDReg(dD, unop(Iop_I32StoF64,
15174                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
15175                  condT);
15176          DIP("fsitod%s d%u, s%u\n", nCC(conq), dD, fM);
15177       } else {
15178          // FUITOD
15179          putDReg(dD, unop(Iop_I32UtoF64,
15180                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
15181                  condT);
15182          DIP("fuitod%s d%u, s%u\n", nCC(conq), dD, fM);
15183       }
15184       goto decode_success_vfp;
15185    }
15186
15187    // FTO{S,U}ID fD, dM
15188    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15189        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15190        && BITS4(1,0,1,1) == INSN(11,8)
15191        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15192       UInt   bD    = (insn28 >> 22) & 1;
15193       UInt   fD    = (INSN(15,12) << 1) | bD;
15194       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
15195       UInt   bZ    = (insn28 >> 7) & 1;
15196       UInt   syned = (insn28 >> 16) & 1;
15197       IRTemp rmode = newTemp(Ity_I32);
15198       assign(rmode, bZ ? mkU32(Irrm_ZERO)
15199                        : mkexpr(mk_get_IR_rounding_mode()));
15200       if (syned) {
15201          // FTOSID
15202          putFReg(fD, unop(Iop_ReinterpI32asF32,
15203                           binop(Iop_F64toI32S, mkexpr(rmode),
15204                                 getDReg(dM))),
15205                  condT);
15206          DIP("ftosi%sd%s s%u, d%u\n", bZ ? "z" : "",
15207              nCC(conq), fD, dM);
15208       } else {
15209          // FTOUID
15210          putFReg(fD, unop(Iop_ReinterpI32asF32,
15211                           binop(Iop_F64toI32U, mkexpr(rmode),
15212                                 getDReg(dM))),
15213                  condT);
15214          DIP("ftoui%sd%s s%u, d%u\n", bZ ? "z" : "",
15215              nCC(conq), fD, dM);
15216       }
15217       goto decode_success_vfp;
15218    }
15219
15220    /* ----------------------------------------------------------- */
15221    /* -- VFP instructions -- single precision                  -- */
15222    /* ----------------------------------------------------------- */
15223
15224    /* --------------------- fldms, fstms --------------------- */
15225    /*
15226                                  31   27   23   19 15 11   7   0
15227                                          P UDWL
15228       C4-98, C5-26   1  FSTMD    cond 1100 1x00 Rn Fd 1010 offset
15229       C4-98, C5-28   2  FSTMDIA  cond 1100 1x10 Rn Fd 1010 offset
15230       C4-98, C5-30   3  FSTMDDB  cond 1101 0x10 Rn Fd 1010 offset
15231
15232       C4-40, C5-26   1  FLDMD    cond 1100 1x01 Rn Fd 1010 offset
15233       C4-40, C5-26   2  FLDMIAD  cond 1100 1x11 Rn Fd 1010 offset
15234       C4-40, C5-26   3  FLDMDBD  cond 1101 0x11 Rn Fd 1010 offset
15235
15236       Regs transferred: F(Fd:D) .. F(Fd:d + offset)
15237       offset must not imply a reg > 15
15238       IA/DB: Rn is changed by (4 x # regs transferred)
15239
15240       case coding:
15241          1  at-Rn   (access at Rn)
15242          2  ia-Rn   (access at Rn, then Rn += 4n)
15243          3  db-Rn   (Rn -= 4n,     then access at Rn)
15244    */
15245    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
15246        && INSN(11,8) == BITS4(1,0,1,0)) {
15247       UInt bP      = (insn28 >> 24) & 1;
15248       UInt bU      = (insn28 >> 23) & 1;
15249       UInt bW      = (insn28 >> 21) & 1;
15250       UInt bL      = (insn28 >> 20) & 1;
15251       UInt bD      = (insn28 >> 22) & 1;
15252       UInt offset  = (insn28 >> 0) & 0xFF;
15253       UInt rN      = INSN(19,16);
15254       UInt fD      = (INSN(15,12) << 1) | bD;
15255       UInt nRegs   = offset;
15256       UInt summary = 0;
15257       Int  i;
15258
15259       /**/ if (bP == 0 && bU == 1 && bW == 0) {
15260          summary = 1;
15261       }
15262       else if (bP == 0 && bU == 1 && bW == 1) {
15263          summary = 2;
15264       }
15265       else if (bP == 1 && bU == 0 && bW == 1) {
15266          summary = 3;
15267       }
15268       else goto after_vfp_fldms_fstms;
15269
15270       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
15271       if (rN == 15 && (summary == 2 || summary == 3 || isT))
15272          goto after_vfp_fldms_fstms;
15273
15274       /* offset must specify at least one register */
15275       if (offset < 1)
15276          goto after_vfp_fldms_fstms;
15277
15278       /* can't transfer regs after S31 */
15279       if (fD + nRegs - 1 >= 32)
15280          goto after_vfp_fldms_fstms;
15281
15282       /* Now, we can't do a conditional load or store, since that very
15283          likely will generate an exception.  So we have to take a side
15284          exit at this point if the condition is false. */
15285       if (condT != IRTemp_INVALID) {
15286          if (isT)
15287             mk_skip_over_T32_if_cond_is_false( condT );
15288          else
15289             mk_skip_over_A32_if_cond_is_false( condT );
15290          condT = IRTemp_INVALID;
15291       }
15292       /* Ok, now we're unconditional.  Do the load or store. */
15293
15294       /* get the old Rn value */
15295       IRTemp rnT = newTemp(Ity_I32);
15296       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
15297                            rN == 15));
15298
15299       /* make a new value for Rn, post-insn */
15300       IRTemp rnTnew = IRTemp_INVALID;
15301       if (summary == 2 || summary == 3) {
15302          rnTnew = newTemp(Ity_I32);
15303          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
15304                               mkexpr(rnT),
15305                               mkU32(4 * nRegs)));
15306       }
15307
15308       /* decide on the base transfer address */
15309       IRTemp taT = newTemp(Ity_I32);
15310       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
15311
15312       /* update Rn if necessary -- in case 3, we're moving it down, so
15313          update before any memory reference, in order to keep Memcheck
15314          and V's stack-extending logic (on linux) happy */
15315       if (summary == 3) {
15316          if (isT)
15317             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
15318          else
15319             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
15320       }
15321
15322       /* generate the transfers */
15323       for (i = 0; i < nRegs; i++) {
15324          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(4*i));
15325          if (bL) {
15326             putFReg(fD + i, loadLE(Ity_F32, addr), IRTemp_INVALID);
15327          } else {
15328             storeLE(addr, getFReg(fD + i));
15329          }
15330       }
15331
15332       /* update Rn if necessary -- in case 2, we're moving it up, so
15333          update after any memory reference, in order to keep Memcheck
15334          and V's stack-extending logic (on linux) happy */
15335       if (summary == 2) {
15336          if (isT)
15337             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
15338          else
15339             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
15340       }
15341
15342       const HChar* nm = bL==1 ? "ld" : "st";
15343       switch (summary) {
15344          case 1:  DIP("f%sms%s r%u, {s%u-s%u}\n",
15345                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
15346                   break;
15347          case 2:  DIP("f%smias%s r%u!, {s%u-s%u}\n",
15348                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
15349                   break;
15350          case 3:  DIP("f%smdbs%s r%u!, {s%u-s%u}\n",
15351                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
15352                   break;
15353          default: vassert(0);
15354       }
15355
15356       goto decode_success_vfp;
15357       /* FIXME alignment constraints? */
15358    }
15359
15360   after_vfp_fldms_fstms:
15361
15362    /* --------------------- fmsr, fmrs --------------------- */
15363    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
15364        && BITS4(1,0,1,0) == INSN(11,8)
15365        && BITS4(0,0,0,0) == INSN(3,0)
15366        && BITS4(0,0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
15367       UInt rD  = INSN(15,12);
15368       UInt b7  = (insn28 >> 7) & 1;
15369       UInt fN  = (INSN(19,16) << 1) | b7;
15370       UInt b20 = (insn28 >> 20) & 1;
15371       if (rD == 15) {
15372          /* fall through */
15373          /* Let's assume that no sane person would want to do
15374             floating-point transfers to or from the program counter,
15375             and simply decline to decode the instruction.  The ARM ARM
15376             doesn't seem to explicitly disallow this case, though. */
15377       } else {
15378          if (b20) {
15379             IRExpr* res = unop(Iop_ReinterpF32asI32, getFReg(fN));
15380             if (isT)
15381                putIRegT(rD, res, condT);
15382             else
15383                putIRegA(rD, res, condT, Ijk_Boring);
15384             DIP("fmrs%s r%u, s%u\n", nCC(conq), rD, fN);
15385          } else {
15386             putFReg(fN, unop(Iop_ReinterpI32asF32,
15387                              isT ? getIRegT(rD) : getIRegA(rD)),
15388                         condT);
15389             DIP("fmsr%s s%u, r%u\n", nCC(conq), fN, rD);
15390          }
15391          goto decode_success_vfp;
15392       }
15393       /* fall through */
15394    }
15395
15396    /* --------------------- f{ld,st}s --------------------- */
15397    // FLDS, FSTS
15398    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
15399        && BITS4(1,0,1,0) == INSN(11,8)) {
15400       UInt bD     = (insn28 >> 22) & 1;
15401       UInt fD     = (INSN(15,12) << 1) | bD;
15402       UInt rN     = INSN(19,16);
15403       UInt offset = (insn28 & 0xFF) << 2;
15404       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
15405       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
15406       /* make unconditional */
15407       if (condT != IRTemp_INVALID) {
15408          if (isT)
15409             mk_skip_over_T32_if_cond_is_false( condT );
15410          else
15411             mk_skip_over_A32_if_cond_is_false( condT );
15412          condT = IRTemp_INVALID;
15413       }
15414       IRTemp ea = newTemp(Ity_I32);
15415       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
15416                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
15417                                 rN == 15),
15418                        mkU32(offset)));
15419       if (bL) {
15420          putFReg(fD, loadLE(Ity_F32,mkexpr(ea)), IRTemp_INVALID);
15421       } else {
15422          storeLE(mkexpr(ea), getFReg(fD));
15423       }
15424       DIP("f%ss%s s%u, [r%u, %c#%u]\n",
15425           bL ? "ld" : "st", nCC(conq), fD, rN,
15426           bU ? '+' : '-', offset);
15427       goto decode_success_vfp;
15428    }
15429
15430    /* --------------------- dp insns (F) --------------------- */
15431    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
15432        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
15433        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
15434       UInt    bM  = (insn28 >> 5) & 1;
15435       UInt    bD  = (insn28 >> 22) & 1;
15436       UInt    bN  = (insn28 >> 7) & 1;
15437       UInt    fM  = (INSN(3,0) << 1) | bM;   /* argR */
15438       UInt    fD  = (INSN(15,12) << 1) | bD; /* dst/acc */
15439       UInt    fN  = (INSN(19,16) << 1) | bN; /* argL */
15440       UInt    bP  = (insn28 >> 23) & 1;
15441       UInt    bQ  = (insn28 >> 21) & 1;
15442       UInt    bR  = (insn28 >> 20) & 1;
15443       UInt    bS  = (insn28 >> 6) & 1;
15444       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
15445       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
15446       switch (opc) {
15447          case BITS4(0,0,0,0): /* MAC: d + n * m */
15448             putFReg(fD, triop(Iop_AddF32, rm,
15449                               getFReg(fD),
15450                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
15451                         condT);
15452             DIP("fmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15453             goto decode_success_vfp;
15454          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
15455             putFReg(fD, triop(Iop_AddF32, rm,
15456                               getFReg(fD),
15457                               unop(Iop_NegF32,
15458                                    triop(Iop_MulF32, rm, getFReg(fN),
15459                                                          getFReg(fM)))),
15460                         condT);
15461             DIP("fnmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15462             goto decode_success_vfp;
15463          case BITS4(0,0,1,0): /* MSC: - d + n * m */
15464             putFReg(fD, triop(Iop_AddF32, rm,
15465                               unop(Iop_NegF32, getFReg(fD)),
15466                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
15467                         condT);
15468             DIP("fmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15469             goto decode_success_vfp;
15470          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
15471             putFReg(fD, triop(Iop_AddF32, rm,
15472                               unop(Iop_NegF32, getFReg(fD)),
15473                               unop(Iop_NegF32,
15474                                    triop(Iop_MulF32, rm,
15475                                                      getFReg(fN),
15476                                                     getFReg(fM)))),
15477                         condT);
15478             DIP("fnmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15479             goto decode_success_vfp;
15480          case BITS4(0,1,0,0): /* MUL: n * m */
15481             putFReg(fD, triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM)),
15482                         condT);
15483             DIP("fmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15484             goto decode_success_vfp;
15485          case BITS4(0,1,0,1): /* NMUL: - n * m */
15486             putFReg(fD, unop(Iop_NegF32,
15487                              triop(Iop_MulF32, rm, getFReg(fN),
15488                                                    getFReg(fM))),
15489                     condT);
15490             DIP("fnmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15491             goto decode_success_vfp;
15492          case BITS4(0,1,1,0): /* ADD: n + m */
15493             putFReg(fD, triop(Iop_AddF32, rm, getFReg(fN), getFReg(fM)),
15494                         condT);
15495             DIP("fadds%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15496             goto decode_success_vfp;
15497          case BITS4(0,1,1,1): /* SUB: n - m */
15498             putFReg(fD, triop(Iop_SubF32, rm, getFReg(fN), getFReg(fM)),
15499                         condT);
15500             DIP("fsubs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15501             goto decode_success_vfp;
15502          case BITS4(1,0,0,0): /* DIV: n / m */
15503             putFReg(fD, triop(Iop_DivF32, rm, getFReg(fN), getFReg(fM)),
15504                         condT);
15505             DIP("fdivs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15506             goto decode_success_vfp;
15507          case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
15508             /* XXXROUNDINGFIXME look up ARM reference for fused
15509                multiply-add rounding */
15510             putFReg(fD, triop(Iop_AddF32, rm,
15511                               unop(Iop_NegF32, getFReg(fD)),
15512                               triop(Iop_MulF32, rm,
15513                                                 getFReg(fN),
15514                                                 getFReg(fM))),
15515                         condT);
15516             DIP("vfnmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15517             goto decode_success_vfp;
15518          case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
15519             /* XXXROUNDINGFIXME look up ARM reference for fused
15520                multiply-add rounding */
15521             putFReg(fD, triop(Iop_AddF32, rm,
15522                               unop(Iop_NegF32, getFReg(fD)),
15523                               triop(Iop_MulF32, rm,
15524                                                 unop(Iop_NegF32, getFReg(fN)),
15525                                                 getFReg(fM))),
15526                         condT);
15527             DIP("vfnmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15528             goto decode_success_vfp;
15529          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
15530             /* XXXROUNDINGFIXME look up ARM reference for fused
15531                multiply-add rounding */
15532             putFReg(fD, triop(Iop_AddF32, rm,
15533                               getFReg(fD),
15534                               triop(Iop_MulF32, rm, getFReg(fN),
15535                                                     getFReg(fM))),
15536                         condT);
15537             DIP("vfmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15538             goto decode_success_vfp;
15539          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
15540             /* XXXROUNDINGFIXME look up ARM reference for fused
15541                multiply-add rounding */
15542             putFReg(fD, triop(Iop_AddF32, rm,
15543                               getFReg(fD),
15544                               triop(Iop_MulF32, rm,
15545                                     unop(Iop_NegF32, getFReg(fN)),
15546                                     getFReg(fM))),
15547                         condT);
15548             DIP("vfmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15549             goto decode_success_vfp;
15550          default:
15551             break;
15552       }
15553    }
15554
15555    /* --------------------- compares (S) --------------------- */
15556    /*          31   27   23   19   15 11   7    3
15557                  28   24   20   16 12    8    4    0
15558       FCMPS    cond 1110 1D11 0100 Fd 1010 01M0 Fm
15559       FCMPES   cond 1110 1D11 0100 Fd 1010 11M0 Fm
15560       FCMPZS   cond 1110 1D11 0101 Fd 1010 0100 0000
15561       FCMPZED  cond 1110 1D11 0101 Fd 1010 1100 0000
15562                                  Z         N
15563
15564       Z=0 Compare Fd:D vs Fm:M     and set FPSCR 31:28 accordingly
15565       Z=1 Compare Fd:D vs zero
15566
15567       N=1 generates Invalid Operation exn if either arg is any kind of NaN
15568       N=0 generates Invalid Operation exn if either arg is a signalling NaN
15569       (Not that we pay any attention to N here)
15570    */
15571    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15572        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15573        && BITS4(1,0,1,0) == INSN(11,8)
15574        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15575       UInt bZ = (insn28 >> 16) & 1;
15576       UInt bN = (insn28 >> 7) & 1;
15577       UInt bD = (insn28 >> 22) & 1;
15578       UInt bM = (insn28 >> 5) & 1;
15579       UInt fD = (INSN(15,12) << 1) | bD;
15580       UInt fM = (INSN(3,0) << 1) | bM;
15581       if (bZ && (INSN(3,0) != 0 || (INSN(7,4) & 3) != 0)) {
15582          /* does not decode; fall through */
15583       } else {
15584          IRTemp argL = newTemp(Ity_F64);
15585          IRTemp argR = newTemp(Ity_F64);
15586          IRTemp irRes = newTemp(Ity_I32);
15587
15588          assign(argL, unop(Iop_F32toF64, getFReg(fD)));
15589          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0))
15590                          : unop(Iop_F32toF64, getFReg(fM)));
15591          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
15592
15593          IRTemp nzcv     = IRTemp_INVALID;
15594          IRTemp oldFPSCR = newTemp(Ity_I32);
15595          IRTemp newFPSCR = newTemp(Ity_I32);
15596
15597          /* This is where the fun starts.  We have to convert 'irRes'
15598             from an IR-convention return result (IRCmpF64Result) to an
15599             ARM-encoded (N,Z,C,V) group.  The final result is in the
15600             bottom 4 bits of 'nzcv'. */
15601          /* Map compare result from IR to ARM(nzcv) */
15602          /*
15603             FP cmp result | IR   | ARM(nzcv)
15604             --------------------------------
15605             UN              0x45   0011
15606             LT              0x01   1000
15607             GT              0x00   0010
15608             EQ              0x40   0110
15609          */
15610          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
15611
15612          /* And update FPSCR accordingly */
15613          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
15614          assign(newFPSCR,
15615                 binop(Iop_Or32,
15616                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
15617                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
15618
15619          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
15620
15621          if (bZ) {
15622             DIP("fcmpz%ss%s s%u\n", bN ? "e" : "", nCC(conq), fD);
15623          } else {
15624             DIP("fcmp%ss%s s%u, s%u\n", bN ? "e" : "",
15625                 nCC(conq), fD, fM);
15626          }
15627          goto decode_success_vfp;
15628       }
15629       /* fall through */
15630    }
15631
15632    /* --------------------- unary (S) --------------------- */
15633    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15634        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15635        && BITS4(1,0,1,0) == INSN(11,8)
15636        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15637       UInt bD = (insn28 >> 22) & 1;
15638       UInt bM = (insn28 >> 5) & 1;
15639       UInt fD  = (INSN(15,12) << 1) | bD;
15640       UInt fM  = (INSN(3,0) << 1) | bM;
15641       UInt b16 = (insn28 >> 16) & 1;
15642       UInt b7  = (insn28 >> 7) & 1;
15643       /**/ if (b16 == 0 && b7 == 0) {
15644          // FCPYS
15645          putFReg(fD, getFReg(fM), condT);
15646          DIP("fcpys%s s%u, s%u\n", nCC(conq), fD, fM);
15647          goto decode_success_vfp;
15648       }
15649       else if (b16 == 0 && b7 == 1) {
15650          // FABSS
15651          putFReg(fD, unop(Iop_AbsF32, getFReg(fM)), condT);
15652          DIP("fabss%s s%u, s%u\n", nCC(conq), fD, fM);
15653          goto decode_success_vfp;
15654       }
15655       else if (b16 == 1 && b7 == 0) {
15656          // FNEGS
15657          putFReg(fD, unop(Iop_NegF32, getFReg(fM)), condT);
15658          DIP("fnegs%s s%u, s%u\n", nCC(conq), fD, fM);
15659          goto decode_success_vfp;
15660       }
15661       else if (b16 == 1 && b7 == 1) {
15662          // FSQRTS
15663          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
15664          putFReg(fD, binop(Iop_SqrtF32, rm, getFReg(fM)), condT);
15665          DIP("fsqrts%s s%u, s%u\n", nCC(conq), fD, fM);
15666          goto decode_success_vfp;
15667       }
15668       else
15669          vassert(0);
15670
15671       /* fall through */
15672    }
15673
15674    /* ----------------- I <-> S conversions ----------------- */
15675
15676    // F{S,U}ITOS fD, fM
15677    /* These are more complex than FSITOD/FUITOD.  In the D cases, a 32
15678       bit int will always fit within the 53 bit mantissa, so there's
15679       no possibility of a loss of precision, but that's obviously not
15680       the case here.  Hence this case possibly requires rounding, and
15681       so it drags in the current rounding mode. */
15682    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15683        && BITS4(1,0,0,0) == INSN(19,16)
15684        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
15685        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15686       UInt bM    = (insn28 >> 5) & 1;
15687       UInt bD    = (insn28 >> 22) & 1;
15688       UInt fM    = (INSN(3,0) << 1) | bM;
15689       UInt fD    = (INSN(15,12) << 1) | bD;
15690       UInt syned = (insn28 >> 7) & 1;
15691       IRTemp rmode = newTemp(Ity_I32);
15692       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
15693       if (syned) {
15694          // FSITOS
15695          putFReg(fD, binop(Iop_F64toF32,
15696                            mkexpr(rmode),
15697                            unop(Iop_I32StoF64,
15698                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
15699                  condT);
15700          DIP("fsitos%s s%u, s%u\n", nCC(conq), fD, fM);
15701       } else {
15702          // FUITOS
15703          putFReg(fD, binop(Iop_F64toF32,
15704                            mkexpr(rmode),
15705                            unop(Iop_I32UtoF64,
15706                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
15707                  condT);
15708          DIP("fuitos%s s%u, s%u\n", nCC(conq), fD, fM);
15709       }
15710       goto decode_success_vfp;
15711    }
15712
15713    // FTO{S,U}IS fD, fM
15714    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15715        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15716        && BITS4(1,0,1,0) == INSN(11,8)
15717        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15718       UInt   bM    = (insn28 >> 5) & 1;
15719       UInt   bD    = (insn28 >> 22) & 1;
15720       UInt   fD    = (INSN(15,12) << 1) | bD;
15721       UInt   fM    = (INSN(3,0) << 1) | bM;
15722       UInt   bZ    = (insn28 >> 7) & 1;
15723       UInt   syned = (insn28 >> 16) & 1;
15724       IRTemp rmode = newTemp(Ity_I32);
15725       assign(rmode, bZ ? mkU32(Irrm_ZERO)
15726                        : mkexpr(mk_get_IR_rounding_mode()));
15727       if (syned) {
15728          // FTOSIS
15729          putFReg(fD, unop(Iop_ReinterpI32asF32,
15730                           binop(Iop_F64toI32S, mkexpr(rmode),
15731                                 unop(Iop_F32toF64, getFReg(fM)))),
15732                  condT);
15733          DIP("ftosi%ss%s s%u, d%u\n", bZ ? "z" : "",
15734              nCC(conq), fD, fM);
15735          goto decode_success_vfp;
15736       } else {
15737          // FTOUIS
15738          putFReg(fD, unop(Iop_ReinterpI32asF32,
15739                           binop(Iop_F64toI32U, mkexpr(rmode),
15740                                 unop(Iop_F32toF64, getFReg(fM)))),
15741                  condT);
15742          DIP("ftoui%ss%s s%u, d%u\n", bZ ? "z" : "",
15743              nCC(conq), fD, fM);
15744          goto decode_success_vfp;
15745       }
15746    }
15747
15748    /* ----------------- S <-> D conversions ----------------- */
15749
15750    // FCVTDS
15751    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15752        && BITS4(0,1,1,1) == INSN(19,16)
15753        && BITS4(1,0,1,0) == INSN(11,8)
15754        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
15755       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
15756       UInt bM = (insn28 >> 5) & 1;
15757       UInt fM = (INSN(3,0) << 1) | bM;
15758       putDReg(dD, unop(Iop_F32toF64, getFReg(fM)), condT);
15759       DIP("fcvtds%s d%u, s%u\n", nCC(conq), dD, fM);
15760       goto decode_success_vfp;
15761    }
15762
15763    // FCVTSD
15764    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15765        && BITS4(0,1,1,1) == INSN(19,16)
15766        && BITS4(1,0,1,1) == INSN(11,8)
15767        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
15768       UInt   bD    = (insn28 >> 22) & 1;
15769       UInt   fD    = (INSN(15,12) << 1) | bD;
15770       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
15771       IRTemp rmode = newTemp(Ity_I32);
15772       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
15773       putFReg(fD, binop(Iop_F64toF32, mkexpr(rmode), getDReg(dM)),
15774                   condT);
15775       DIP("fcvtsd%s s%u, d%u\n", nCC(conq), fD, dM);
15776       goto decode_success_vfp;
15777    }
15778
15779    /* --------------- VCVT fixed<->floating, VFP --------------- */
15780    /*          31   27   23   19   15 11   7    3
15781                  28   24   20   16 12    8    4    0
15782
15783                cond 1110 1D11 1p1U Vd 101f x1i0 imm4
15784
15785       VCVT<c>.<Td>.F64 <Dd>, <Dd>, #fbits
15786       VCVT<c>.<Td>.F32 <Dd>, <Dd>, #fbits
15787       VCVT<c>.F64.<Td> <Dd>, <Dd>, #fbits
15788       VCVT<c>.F32.<Td> <Dd>, <Dd>, #fbits
15789       are of this form.  We only handle a subset of the cases though.
15790    */
15791    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15792        && BITS4(1,0,1,0) == (INSN(19,16) & BITS4(1,0,1,0))
15793        && BITS3(1,0,1) == INSN(11,9)
15794        && BITS3(1,0,0) == (INSN(6,4) & BITS3(1,0,1))) {
15795       UInt bD        = INSN(22,22);
15796       UInt bOP       = INSN(18,18);
15797       UInt bU        = INSN(16,16);
15798       UInt Vd        = INSN(15,12);
15799       UInt bSF       = INSN(8,8);
15800       UInt bSX       = INSN(7,7);
15801       UInt bI        = INSN(5,5);
15802       UInt imm4      = INSN(3,0);
15803       Bool to_fixed  = bOP == 1;
15804       Bool dp_op     = bSF == 1;
15805       Bool unsyned   = bU == 1;
15806       UInt size      = bSX == 0 ? 16 : 32;
15807       Int  frac_bits = size - ((imm4 << 1) | bI);
15808       UInt d         = dp_op  ? ((bD << 4) | Vd)  : ((Vd << 1) | bD);
15809
15810       IRExpr* rm     = mkU32(Irrm_NEAREST);
15811       IRTemp  scale  = newTemp(Ity_F64);
15812       assign(scale, unop(Iop_I32UtoF64, mkU32( ((UInt)1) << (frac_bits-1) )));
15813
15814       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && !dp_op
15815                                             && size == 32) {
15816          /* VCVT.F32.{S,U}32 S[d], S[d], #frac_bits */
15817          /* This generates really horrible code.  We could potentially
15818             do much better. */
15819          IRTemp rmode = newTemp(Ity_I32);
15820          assign(rmode, mkU32(Irrm_NEAREST)); // per the spec
15821          IRTemp src32 = newTemp(Ity_I32);
15822          assign(src32,  unop(Iop_ReinterpF32asI32, getFReg(d)));
15823          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
15824                                 mkexpr(src32 ) );
15825          IRExpr* resF64 = triop(Iop_DivF64,
15826                                 rm, as_F64,
15827                                 triop(Iop_AddF64, rm, mkexpr(scale),
15828                                                       mkexpr(scale)));
15829          IRExpr* resF32 = binop(Iop_F64toF32, mkexpr(rmode), resF64);
15830          putFReg(d, resF32, condT);
15831          DIP("vcvt.f32.%c32, s%u, s%u, #%d\n",
15832              unsyned ? 'u' : 's', d, d, frac_bits);
15833          goto decode_success_vfp;
15834       }
15835       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && dp_op
15836                                             && size == 32) {
15837          /* VCVT.F64.{S,U}32 D[d], D[d], #frac_bits */
15838          /* This generates really horrible code.  We could potentially
15839             do much better. */
15840          IRTemp src32 = newTemp(Ity_I32);
15841          assign(src32, unop(Iop_64to32, getDRegI64(d)));
15842          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
15843                                 mkexpr(src32 ) );
15844          IRExpr* resF64 = triop(Iop_DivF64,
15845                                 rm, as_F64,
15846                                 triop(Iop_AddF64, rm, mkexpr(scale),
15847                                                       mkexpr(scale)));
15848          putDReg(d, resF64, condT);
15849          DIP("vcvt.f64.%c32, d%u, d%u, #%d\n",
15850              unsyned ? 'u' : 's', d, d, frac_bits);
15851          goto decode_success_vfp;
15852       }
15853       if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && dp_op
15854                                             && size == 32) {
15855          /* VCVT.{S,U}32.F64 D[d], D[d], #frac_bits */
15856          IRTemp srcF64 = newTemp(Ity_F64);
15857          assign(srcF64, getDReg(d));
15858          IRTemp scaledF64 = newTemp(Ity_F64);
15859          assign(scaledF64, triop(Iop_MulF64,
15860                                  rm, mkexpr(srcF64),
15861                                  triop(Iop_AddF64, rm, mkexpr(scale),
15862                                                        mkexpr(scale))));
15863          IRTemp rmode = newTemp(Ity_I32);
15864          assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
15865          IRTemp asI32 = newTemp(Ity_I32);
15866          assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
15867                              mkexpr(rmode), mkexpr(scaledF64)));
15868          putDRegI64(d, unop(unsyned ? Iop_32Uto64 : Iop_32Sto64,
15869                             mkexpr(asI32)), condT);
15870
15871          DIP("vcvt.%c32.f64, d%u, d%u, #%d\n",
15872              unsyned ? 'u' : 's', d, d, frac_bits);
15873          goto decode_success_vfp;
15874       }
15875       if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && !dp_op
15876                                             && size == 32) {
15877          /* VCVT.{S,U}32.F32 S[d], S[d], #frac_bits */
15878          IRTemp srcF32 = newTemp(Ity_F32);
15879          assign(srcF32, getFReg(d));
15880          IRTemp scaledF64 = newTemp(Ity_F64);
15881          assign(scaledF64, triop(Iop_MulF64,
15882                                  rm, unop(Iop_F32toF64, mkexpr(srcF32)),
15883                                  triop(Iop_AddF64, rm, mkexpr(scale),
15884                                                        mkexpr(scale))));
15885          IRTemp rmode = newTemp(Ity_I32);
15886          assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
15887          IRTemp asI32 = newTemp(Ity_I32);
15888          assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
15889                              mkexpr(rmode), mkexpr(scaledF64)));
15890          putFReg(d, unop(Iop_ReinterpI32asF32, mkexpr(asI32)), condT);
15891          DIP("vcvt.%c32.f32, d%u, d%u, #%d\n",
15892              unsyned ? 'u' : 's', d, d, frac_bits);
15893          goto decode_success_vfp;
15894       }
15895       /* fall through */
15896    }
15897
15898    /* FAILURE */
15899    return False;
15900
15901   decode_success_vfp:
15902    /* Check that any accepted insn really is a CP10 or CP11 insn, iow,
15903       assert that we aren't accepting, in this fn, insns that actually
15904       should be handled somewhere else. */
15905    vassert(INSN(11,9) == BITS3(1,0,1)); // 11:8 = 1010 or 1011
15906    return True;
15907
15908 #  undef INSN
15909 }
15910
15911
15912 /*------------------------------------------------------------*/
15913 /*--- Instructions in NV (never) space                     ---*/
15914 /*------------------------------------------------------------*/
15915
15916 /* ARM only */
15917 /* Translate a NV space instruction.  If successful, returns True and
15918    *dres may or may not be updated.  If failure, returns False and
15919    doesn't change *dres nor create any IR.
15920
15921    Note that all NEON instructions (in ARM mode) up to and including
15922    ARMv7, but not later, are handled through here, since they are all
15923    in NV space.
15924 */
15925 static Bool decode_NV_instruction_ARMv7_and_below
15926                                  ( /*MOD*/DisResult* dres,
15927                                     const VexArchInfo* archinfo,
15928                                     UInt insn )
15929 {
15930 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
15931 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
15932
15933    HChar dis_buf[128];
15934
15935    // Should only be called for NV instructions
15936    vassert(BITS4(1,1,1,1) == INSN_COND);
15937
15938    /* ------------------------ pld{w} ------------------------ */
15939    if (BITS8(0,1,0,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
15940        && BITS4(1,1,1,1) == INSN(15,12)) {
15941       UInt rN    = INSN(19,16);
15942       UInt imm12 = INSN(11,0);
15943       UInt bU    = INSN(23,23);
15944       UInt bR    = INSN(22,22);
15945       DIP("pld%c [r%u, #%c%u]\n", bR ? ' ' : 'w', rN, bU ? '+' : '-', imm12);
15946       return True;
15947    }
15948
15949    if (BITS8(0,1,1,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
15950        && BITS4(1,1,1,1) == INSN(15,12)
15951        && 0 == INSN(4,4)) {
15952       UInt rN   = INSN(19,16);
15953       UInt rM   = INSN(3,0);
15954       UInt imm5 = INSN(11,7);
15955       UInt sh2  = INSN(6,5);
15956       UInt bU   = INSN(23,23);
15957       UInt bR   = INSN(22,22);
15958       if (rM != 15 && (rN != 15 || bR)) {
15959          IRExpr* eaE = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
15960                                                        sh2, imm5, dis_buf);
15961          IRTemp eaT = newTemp(Ity_I32);
15962          /* Bind eaE to a temp merely for debugging-vex purposes, so we
15963             can check it's a plausible decoding.  It will get removed
15964             by iropt a little later on. */
15965          vassert(eaE);
15966          assign(eaT, eaE);
15967          DIP("pld%c %s\n", bR ? ' ' : 'w', dis_buf);
15968          return True;
15969       }
15970       /* fall through */
15971    }
15972
15973    /* ------------------------ pli ------------------------ */
15974    if (BITS8(0,1,0,0, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
15975        && BITS4(1,1,1,1) == INSN(15,12)) {
15976       UInt rN    = INSN(19,16);
15977       UInt imm12 = INSN(11,0);
15978       UInt bU    = INSN(23,23);
15979       DIP("pli [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
15980       return True;
15981    }
15982
15983    /* --------------------- Interworking branches --------------------- */
15984
15985    // BLX (1), viz, unconditional branch and link to R15+simm24
15986    // and set CPSR.T = 1, that is, switch to Thumb mode
15987    if (INSN(31,25) == BITS7(1,1,1,1,1,0,1)) {
15988       UInt bitH   = INSN(24,24);
15989       UInt uimm24 = INSN(23,0);   uimm24 <<= 8;
15990       Int  simm24 = (Int)uimm24;  simm24 >>= 8;
15991       simm24 = (((UInt)simm24) << 2) + (bitH << 1);
15992       /* Now this is a bit tricky.  Since we're decoding an ARM insn,
15993          it is implies that CPSR.T == 0.  Hence the current insn's
15994          address is guaranteed to be of the form X--(30)--X00.  So, no
15995          need to mask any bits off it.  But need to set the lowest bit
15996          to 1 to denote we're in Thumb mode after this, since
15997          guest_R15T has CPSR.T as the lowest bit.  And we can't chase
15998          into the call, so end the block at this point. */
15999       UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
16000       putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
16001                     IRTemp_INVALID/*because AL*/, Ijk_Boring );
16002       llPutIReg(15, mkU32(dst));
16003       dres->jk_StopHere = Ijk_Call;
16004       dres->whatNext    = Dis_StopHere;
16005       DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
16006       return True;
16007    }
16008
16009    /* ------------------- v7 barrier insns ------------------- */
16010    switch (insn) {
16011       case 0xF57FF06F: /* ISB */
16012          stmt( IRStmt_MBE(Imbe_Fence) );
16013          DIP("ISB\n");
16014          return True;
16015       case 0xF57FF04F: /* DSB sy */
16016       case 0xF57FF04E: /* DSB st */
16017       case 0xF57FF04B: /* DSB ish */
16018       case 0xF57FF04A: /* DSB ishst */
16019       case 0xF57FF047: /* DSB nsh */
16020       case 0xF57FF046: /* DSB nshst */
16021       case 0xF57FF043: /* DSB osh */
16022       case 0xF57FF042: /* DSB oshst */
16023          stmt( IRStmt_MBE(Imbe_Fence) );
16024          DIP("DSB\n");
16025          return True;
16026       case 0xF57FF05F: /* DMB sy */
16027       case 0xF57FF05E: /* DMB st */
16028       case 0xF57FF05B: /* DMB ish */
16029       case 0xF57FF05A: /* DMB ishst */
16030       case 0xF57FF057: /* DMB nsh */
16031       case 0xF57FF056: /* DMB nshst */
16032       case 0xF57FF053: /* DMB osh */
16033       case 0xF57FF052: /* DMB oshst */
16034          stmt( IRStmt_MBE(Imbe_Fence) );
16035          DIP("DMB\n");
16036          return True;
16037       default:
16038          break;
16039    }
16040
16041    /* ------------------- CLREX ------------------ */
16042    if (insn == 0xF57FF01F) {
16043       /* AFAICS, this simply cancels a (all?) reservations made by a
16044          (any?) preceding LDREX(es).  Arrange to hand it through to
16045          the back end. */
16046       stmt( IRStmt_MBE(Imbe_CancelReservation) );
16047       DIP("clrex\n");
16048       return True;
16049    }
16050
16051    /* ------------------- NEON ------------------- */
16052    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
16053       Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
16054                         dres, insn, IRTemp_INVALID/*unconditional*/,
16055                         False/*!isT*/
16056                      );
16057       if (ok_neon)
16058          return True;
16059    }
16060
16061    // unrecognised
16062    return False;
16063
16064 #  undef INSN_COND
16065 #  undef INSN
16066 }
16067
16068
16069 /*------------------------------------------------------------*/
16070 /*--- Disassemble a single ARM instruction                 ---*/
16071 /*------------------------------------------------------------*/
16072
16073 /* Disassemble a single ARM instruction into IR.  The instruction is
16074    located in host memory at guest_instr, and has (decoded) guest IP
16075    of guest_R15_curr_instr_notENC, which will have been set before the
16076    call here. */
16077
16078 static
16079 DisResult disInstr_ARM_WRK (
16080              const UChar* guest_instr,
16081              const VexArchInfo* archinfo,
16082              const VexAbiInfo*  abiinfo,
16083              Bool         sigill_diag
16084           )
16085 {
16086    // A macro to fish bits out of 'insn'.
16087 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
16088 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
16089
16090    DisResult dres;
16091    UInt      insn;
16092    IRTemp    condT; /* :: Ity_I32 */
16093    UInt      summary;
16094    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
16095
16096    /* Set result defaults. */
16097    dres.whatNext    = Dis_Continue;
16098    dres.len         = 4;
16099    dres.jk_StopHere = Ijk_INVALID;
16100    dres.hint        = Dis_HintNone;
16101
16102    /* Set default actions for post-insn handling of writes to r15, if
16103       required. */
16104    r15written = False;
16105    r15guard   = IRTemp_INVALID; /* unconditional */
16106    r15kind    = Ijk_Boring;
16107
16108    /* At least this is simple on ARM: insns are all 4 bytes long, and
16109       4-aligned.  So just fish the whole thing out of memory right now
16110       and have done. */
16111    insn = getUIntLittleEndianly( guest_instr );
16112
16113    if (0) vex_printf("insn: 0x%x\n", insn);
16114
16115    DIP("\t(arm) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
16116
16117    vassert(0 == (guest_R15_curr_instr_notENC & 3));
16118
16119    /* ----------------------------------------------------------- */
16120
16121    /* Spot "Special" instructions (see comment at top of file). */
16122    {
16123       const UChar* code = guest_instr;
16124       /* Spot the 16-byte preamble:
16125
16126          e1a0c1ec  mov r12, r12, ROR #3
16127          e1a0c6ec  mov r12, r12, ROR #13
16128          e1a0ceec  mov r12, r12, ROR #29
16129          e1a0c9ec  mov r12, r12, ROR #19
16130       */
16131       UInt word1 = 0xE1A0C1EC;
16132       UInt word2 = 0xE1A0C6EC;
16133       UInt word3 = 0xE1A0CEEC;
16134       UInt word4 = 0xE1A0C9EC;
16135       if (getUIntLittleEndianly(code+ 0) == word1 &&
16136           getUIntLittleEndianly(code+ 4) == word2 &&
16137           getUIntLittleEndianly(code+ 8) == word3 &&
16138           getUIntLittleEndianly(code+12) == word4) {
16139          /* Got a "Special" instruction preamble.  Which one is it? */
16140          if (getUIntLittleEndianly(code+16) == 0xE18AA00A
16141                                                /* orr r10,r10,r10 */) {
16142             /* R3 = client_request ( R4 ) */
16143             DIP("r3 = client_request ( %%r4 )\n");
16144             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
16145             dres.jk_StopHere = Ijk_ClientReq;
16146             dres.whatNext    = Dis_StopHere;
16147             goto decode_success;
16148          }
16149          else
16150          if (getUIntLittleEndianly(code+16) == 0xE18BB00B
16151                                                /* orr r11,r11,r11 */) {
16152             /* R3 = guest_NRADDR */
16153             DIP("r3 = guest_NRADDR\n");
16154             dres.len = 20;
16155             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
16156             goto decode_success;
16157          }
16158          else
16159          if (getUIntLittleEndianly(code+16) == 0xE18CC00C
16160                                                /* orr r12,r12,r12 */) {
16161             /*  branch-and-link-to-noredir R4 */
16162             DIP("branch-and-link-to-noredir r4\n");
16163             llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
16164             llPutIReg(15, llGetIReg(4));
16165             dres.jk_StopHere = Ijk_NoRedir;
16166             dres.whatNext    = Dis_StopHere;
16167             goto decode_success;
16168          }
16169          else
16170          if (getUIntLittleEndianly(code+16) == 0xE1899009
16171                                                /* orr r9,r9,r9 */) {
16172             /* IR injection */
16173             DIP("IR injection\n");
16174             vex_inject_ir(irsb, Iend_LE);
16175             // Invalidate the current insn. The reason is that the IRop we're
16176             // injecting here can change. In which case the translation has to
16177             // be redone. For ease of handling, we simply invalidate all the
16178             // time.
16179             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
16180             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
16181             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
16182             dres.whatNext    = Dis_StopHere;
16183             dres.jk_StopHere = Ijk_InvalICache;
16184             goto decode_success;
16185          }
16186          /* We don't know what it is.  Set opc1/opc2 so decode_failure
16187             can print the insn following the Special-insn preamble. */
16188          insn = getUIntLittleEndianly(code+16);
16189          goto decode_failure;
16190          /*NOTREACHED*/
16191       }
16192
16193    }
16194
16195    /* ----------------------------------------------------------- */
16196
16197    /* Main ARM instruction decoder starts here. */
16198
16199    /* Deal with the condition.  Strategy is to merely generate a
16200       condition temporary at this point (or IRTemp_INVALID, meaning
16201       unconditional).  We leave it to lower-level instruction decoders
16202       to decide whether they can generate straight-line code, or
16203       whether they must generate a side exit before the instruction.
16204       condT :: Ity_I32 and is always either zero or one. */
16205    condT = IRTemp_INVALID;
16206    switch ( (ARMCondcode)INSN_COND ) {
16207       case ARMCondNV: {
16208          // Illegal instruction prior to v5 (see ARM ARM A3-5), but
16209          // some cases are acceptable
16210          Bool ok
16211             = decode_NV_instruction_ARMv7_and_below(&dres, archinfo, insn);
16212          if (ok)
16213             goto decode_success;
16214          else
16215             goto after_v7_decoder;
16216       }
16217       case ARMCondAL: // Always executed
16218          break;
16219       case ARMCondEQ: case ARMCondNE: case ARMCondHS: case ARMCondLO:
16220       case ARMCondMI: case ARMCondPL: case ARMCondVS: case ARMCondVC:
16221       case ARMCondHI: case ARMCondLS: case ARMCondGE: case ARMCondLT:
16222       case ARMCondGT: case ARMCondLE:
16223          condT = newTemp(Ity_I32);
16224          assign( condT, mk_armg_calculate_condition( INSN_COND ));
16225          break;
16226    }
16227
16228    /* ----------------------------------------------------------- */
16229    /* -- ARMv5 integer instructions                            -- */
16230    /* ----------------------------------------------------------- */
16231
16232    /* ---------------- Data processing ops ------------------- */
16233
16234    if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0))
16235        && !(INSN(25,25) == 0 && INSN(7,7) == 1 && INSN(4,4) == 1)) {
16236       IRTemp  shop = IRTemp_INVALID; /* shifter operand */
16237       IRTemp  shco = IRTemp_INVALID; /* shifter carry out */
16238       UInt    rD   = (insn >> 12) & 0xF; /* 15:12 */
16239       UInt    rN   = (insn >> 16) & 0xF; /* 19:16 */
16240       UInt    bitS = (insn >> 20) & 1; /* 20:20 */
16241       IRTemp  rNt  = IRTemp_INVALID;
16242       IRTemp  res  = IRTemp_INVALID;
16243       IRTemp  oldV = IRTemp_INVALID;
16244       IRTemp  oldC = IRTemp_INVALID;
16245       const HChar*  name = NULL;
16246       IROp    op   = Iop_INVALID;
16247       Bool    ok;
16248
16249       switch (INSN(24,21)) {
16250
16251          /* --------- ADD, SUB, AND, OR --------- */
16252          case BITS4(0,1,0,0): /* ADD:  Rd = Rn + shifter_operand */
16253             name = "add"; op = Iop_Add32; goto rd_eq_rn_op_SO;
16254          case BITS4(0,0,1,0): /* SUB:  Rd = Rn - shifter_operand */
16255             name = "sub"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
16256          case BITS4(0,0,1,1): /* RSB:  Rd = shifter_operand - Rn */
16257             name = "rsb"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
16258          case BITS4(0,0,0,0): /* AND:  Rd = Rn & shifter_operand */
16259             name = "and"; op = Iop_And32; goto rd_eq_rn_op_SO;
16260          case BITS4(1,1,0,0): /* OR:   Rd = Rn | shifter_operand */
16261             name = "orr"; op = Iop_Or32; goto rd_eq_rn_op_SO;
16262          case BITS4(0,0,0,1): /* EOR:  Rd = Rn ^ shifter_operand */
16263             name = "eor"; op = Iop_Xor32; goto rd_eq_rn_op_SO;
16264          case BITS4(1,1,1,0): /* BIC:  Rd = Rn & ~shifter_operand */
16265             name = "bic"; op = Iop_And32; goto rd_eq_rn_op_SO;
16266          rd_eq_rn_op_SO: {
16267             Bool isRSB = False;
16268             Bool isBIC = False;
16269             switch (INSN(24,21)) {
16270                case BITS4(0,0,1,1):
16271                   vassert(op == Iop_Sub32); isRSB = True; break;
16272                case BITS4(1,1,1,0):
16273                   vassert(op == Iop_And32); isBIC = True; break;
16274                default:
16275                   break;
16276             }
16277             rNt = newTemp(Ity_I32);
16278             assign(rNt, getIRegA(rN));
16279             ok = mk_shifter_operand(
16280                     INSN(25,25), INSN(11,0),
16281                     &shop, bitS ? &shco : NULL, dis_buf
16282                  );
16283             if (!ok)
16284                break;
16285             res = newTemp(Ity_I32);
16286             // compute the main result
16287             if (isRSB) {
16288                // reverse-subtract: shifter_operand - Rn
16289                vassert(op == Iop_Sub32);
16290                assign(res, binop(op, mkexpr(shop), mkexpr(rNt)) );
16291             } else if (isBIC) {
16292                // andn: shifter_operand & ~Rn
16293                vassert(op == Iop_And32);
16294                assign(res, binop(op, mkexpr(rNt),
16295                                      unop(Iop_Not32, mkexpr(shop))) );
16296             } else {
16297                // normal: Rn op shifter_operand
16298                assign(res, binop(op, mkexpr(rNt), mkexpr(shop)) );
16299             }
16300             // but don't commit it until after we've finished
16301             // all necessary reads from the guest state
16302             if (bitS
16303                 && (op == Iop_And32 || op == Iop_Or32 || op == Iop_Xor32)) {
16304                oldV = newTemp(Ity_I32);
16305                assign( oldV, mk_armg_calculate_flag_v() );
16306             }
16307             // can't safely read guest state after here
16308             // now safe to put the main result
16309             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
16310             // XXXX!! not safe to read any guest state after
16311             // this point (I think the code below doesn't do that).
16312             if (!bitS)
16313                vassert(shco == IRTemp_INVALID);
16314             /* Update the flags thunk if necessary */
16315             if (bitS) {
16316                vassert(shco != IRTemp_INVALID);
16317                switch (op) {
16318                   case Iop_Add32:
16319                      setFlags_D1_D2( ARMG_CC_OP_ADD, rNt, shop, condT );
16320                      break;
16321                   case Iop_Sub32:
16322                      if (isRSB) {
16323                         setFlags_D1_D2( ARMG_CC_OP_SUB, shop, rNt, condT );
16324                      } else {
16325                         setFlags_D1_D2( ARMG_CC_OP_SUB, rNt, shop, condT );
16326                      }
16327                      break;
16328                   case Iop_And32: /* BIC and AND set the flags the same */
16329                   case Iop_Or32:
16330                   case Iop_Xor32:
16331                      // oldV has been read just above
16332                      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
16333                                         res, shco, oldV, condT );
16334                      break;
16335                   default:
16336                      vassert(0);
16337                }
16338             }
16339             DIP("%s%s%s r%u, r%u, %s\n",
16340                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
16341             goto decode_success;
16342          }
16343
16344          /* --------- MOV, MVN --------- */
16345          case BITS4(1,1,0,1):   /* MOV: Rd = shifter_operand */
16346          case BITS4(1,1,1,1): { /* MVN: Rd = not(shifter_operand) */
16347             Bool isMVN = INSN(24,21) == BITS4(1,1,1,1);
16348             IRTemp jk = Ijk_Boring;
16349             if (rN != 0)
16350                break; /* rN must be zero */
16351             ok = mk_shifter_operand(
16352                     INSN(25,25), INSN(11,0),
16353                     &shop, bitS ? &shco : NULL, dis_buf
16354                  );
16355             if (!ok)
16356                break;
16357             res = newTemp(Ity_I32);
16358             assign( res, isMVN ? unop(Iop_Not32, mkexpr(shop))
16359                                : mkexpr(shop) );
16360             if (bitS) {
16361                vassert(shco != IRTemp_INVALID);
16362                oldV = newTemp(Ity_I32);
16363                assign( oldV, mk_armg_calculate_flag_v() );
16364             } else {
16365                vassert(shco == IRTemp_INVALID);
16366             }
16367             /* According to the Cortex A8 TRM Sec. 5.2.1, MOV PC, r14 is a
16368                 return for purposes of branch prediction. */
16369             if (!isMVN && INSN(11,0) == 14) {
16370               jk = Ijk_Ret;
16371             }
16372             // can't safely read guest state after here
16373             putIRegA( rD, mkexpr(res), condT, jk );
16374             /* Update the flags thunk if necessary */
16375             if (bitS) {
16376                setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
16377                                   res, shco, oldV, condT );
16378             }
16379             DIP("%s%s%s r%u, %s\n",
16380                 isMVN ? "mvn" : "mov",
16381                 nCC(INSN_COND), bitS ? "s" : "", rD, dis_buf );
16382             goto decode_success;
16383          }
16384
16385          /* --------- CMP --------- */
16386          case BITS4(1,0,1,0):   /* CMP:  (void) Rn - shifter_operand */
16387          case BITS4(1,0,1,1): { /* CMN:  (void) Rn + shifter_operand */
16388             Bool isCMN = INSN(24,21) == BITS4(1,0,1,1);
16389             if (rD != 0)
16390                break; /* rD must be zero */
16391             if (bitS == 0)
16392                break; /* if S (bit 20) is not set, it's not CMP/CMN */
16393             rNt = newTemp(Ity_I32);
16394             assign(rNt, getIRegA(rN));
16395             ok = mk_shifter_operand(
16396                     INSN(25,25), INSN(11,0),
16397                     &shop, NULL, dis_buf
16398                  );
16399             if (!ok)
16400                break;
16401             // can't safely read guest state after here
16402             /* Update the flags thunk. */
16403             setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
16404                             rNt, shop, condT );
16405             DIP("%s%s r%u, %s\n",
16406                 isCMN ? "cmn" : "cmp",
16407                 nCC(INSN_COND), rN, dis_buf );
16408             goto decode_success;
16409          }
16410
16411          /* --------- TST --------- */
16412          case BITS4(1,0,0,0):   /* TST:  (void) Rn & shifter_operand */
16413          case BITS4(1,0,0,1): { /* TEQ:  (void) Rn ^ shifter_operand */
16414             Bool isTEQ = INSN(24,21) == BITS4(1,0,0,1);
16415             if (rD != 0)
16416                break; /* rD must be zero */
16417             if (bitS == 0)
16418                break; /* if S (bit 20) is not set, it's not TST/TEQ */
16419             rNt = newTemp(Ity_I32);
16420             assign(rNt, getIRegA(rN));
16421             ok = mk_shifter_operand(
16422                     INSN(25,25), INSN(11,0),
16423                     &shop, &shco, dis_buf
16424                  );
16425             if (!ok)
16426                break;
16427             /* Update the flags thunk. */
16428             res = newTemp(Ity_I32);
16429             assign( res, binop(isTEQ ? Iop_Xor32 : Iop_And32,
16430                                mkexpr(rNt), mkexpr(shop)) );
16431             oldV = newTemp(Ity_I32);
16432             assign( oldV, mk_armg_calculate_flag_v() );
16433             // can't safely read guest state after here
16434             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
16435                                res, shco, oldV, condT );
16436             DIP("%s%s r%u, %s\n",
16437                 isTEQ ? "teq" : "tst",
16438                 nCC(INSN_COND), rN, dis_buf );
16439             goto decode_success;
16440          }
16441
16442          /* --------- ADC, SBC, RSC --------- */
16443          case BITS4(0,1,0,1): /* ADC:  Rd = Rn + shifter_operand + oldC */
16444             name = "adc"; goto rd_eq_rn_op_SO_op_oldC;
16445          case BITS4(0,1,1,0): /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
16446             name = "sbc"; goto rd_eq_rn_op_SO_op_oldC;
16447          case BITS4(0,1,1,1): /* RSC:  Rd = shifter_operand - Rn - (oldC ^ 1) */
16448             name = "rsc"; goto rd_eq_rn_op_SO_op_oldC;
16449          rd_eq_rn_op_SO_op_oldC: {
16450             // FIXME: shco isn't used for anything.  Get rid of it.
16451             rNt = newTemp(Ity_I32);
16452             assign(rNt, getIRegA(rN));
16453             ok = mk_shifter_operand(
16454                     INSN(25,25), INSN(11,0),
16455                     &shop, bitS ? &shco : NULL, dis_buf
16456                  );
16457             if (!ok)
16458                break;
16459             oldC = newTemp(Ity_I32);
16460             assign( oldC, mk_armg_calculate_flag_c() );
16461             res = newTemp(Ity_I32);
16462             // compute the main result
16463             switch (INSN(24,21)) {
16464                case BITS4(0,1,0,1): /* ADC */
16465                   assign(res,
16466                          binop(Iop_Add32,
16467                                binop(Iop_Add32, mkexpr(rNt), mkexpr(shop)),
16468                                mkexpr(oldC) ));
16469                   break;
16470                case BITS4(0,1,1,0): /* SBC */
16471                   assign(res,
16472                          binop(Iop_Sub32,
16473                                binop(Iop_Sub32, mkexpr(rNt), mkexpr(shop)),
16474                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
16475                   break;
16476                case BITS4(0,1,1,1): /* RSC */
16477                   assign(res,
16478                          binop(Iop_Sub32,
16479                                binop(Iop_Sub32, mkexpr(shop), mkexpr(rNt)),
16480                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
16481                   break;
16482                default:
16483                   vassert(0);
16484             }
16485             // but don't commit it until after we've finished
16486             // all necessary reads from the guest state
16487             // now safe to put the main result
16488             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
16489             // XXXX!! not safe to read any guest state after
16490             // this point (I think the code below doesn't do that).
16491             if (!bitS)
16492                vassert(shco == IRTemp_INVALID);
16493             /* Update the flags thunk if necessary */
16494             if (bitS) {
16495                vassert(shco != IRTemp_INVALID);
16496                switch (INSN(24,21)) {
16497                   case BITS4(0,1,0,1): /* ADC */
16498                      setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
16499                                         rNt, shop, oldC, condT );
16500                      break;
16501                   case BITS4(0,1,1,0): /* SBC */
16502                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
16503                                         rNt, shop, oldC, condT );
16504                      break;
16505                   case BITS4(0,1,1,1): /* RSC */
16506                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
16507                                         shop, rNt, oldC, condT );
16508                      break;
16509                   default:
16510                      vassert(0);
16511                }
16512             }
16513             DIP("%s%s%s r%u, r%u, %s\n",
16514                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
16515             goto decode_success;
16516          }
16517
16518          default:
16519             vassert(0);
16520       }
16521    } /* if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0)) */
16522
16523    /* --------------------- Load/store (ubyte & word) -------- */
16524    // LDR STR LDRB STRB
16525    /*                 31   27   23   19 15 11    6   4 3  # highest bit
16526                         28   24   20 16 12
16527       A5-20   1 | 16  cond 0101 UB0L Rn Rd imm12
16528       A5-22   1 | 32  cond 0111 UBOL Rn Rd imm5  sh2 0 Rm
16529       A5-24   2 | 16  cond 0101 UB1L Rn Rd imm12
16530       A5-26   2 | 32  cond 0111 UB1L Rn Rd imm5  sh2 0 Rm
16531       A5-28   3 | 16  cond 0100 UB0L Rn Rd imm12
16532       A5-32   3 | 32  cond 0110 UB0L Rn Rd imm5  sh2 0 Rm
16533    */
16534    /* case coding:
16535              1   at-ea               (access at ea)
16536              2   at-ea-then-upd      (access at ea, then Rn = ea)
16537              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
16538       ea coding
16539              16  Rn +/- imm12
16540              32  Rn +/- Rm sh2 imm5
16541    */
16542    /* Quickly skip over all of this for hopefully most instructions */
16543    if ((INSN(27,24) & BITS4(1,1,0,0)) != BITS4(0,1,0,0))
16544       goto after_load_store_ubyte_or_word;
16545
16546    summary = 0;
16547
16548    /**/ if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 0) {
16549       summary = 1 | 16;
16550    }
16551    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 0
16552                                           && INSN(4,4) == 0) {
16553       summary = 1 | 32;
16554    }
16555    else if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 1) {
16556       summary = 2 | 16;
16557    }
16558    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 1
16559                                           && INSN(4,4) == 0) {
16560       summary = 2 | 32;
16561    }
16562    else if (INSN(27,24) == BITS4(0,1,0,0) && INSN(21,21) == 0) {
16563       summary = 3 | 16;
16564    }
16565    else if (INSN(27,24) == BITS4(0,1,1,0) && INSN(21,21) == 0
16566                                           && INSN(4,4) == 0) {
16567       summary = 3 | 32;
16568    }
16569    else goto after_load_store_ubyte_or_word;
16570
16571    { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
16572      UInt rD = (insn >> 12) & 0xF; /* 15:12 */
16573      UInt rM = (insn >> 0)  & 0xF; /*  3:0  */
16574      UInt bU = (insn >> 23) & 1;      /* 23 */
16575      UInt bB = (insn >> 22) & 1;      /* 22 */
16576      UInt bL = (insn >> 20) & 1;      /* 20 */
16577      UInt imm12 = (insn >> 0) & 0xFFF; /* 11:0 */
16578      UInt imm5  = (insn >> 7) & 0x1F;  /* 11:7 */
16579      UInt sh2   = (insn >> 5) & 3;     /* 6:5 */
16580
16581      /* Skip some invalid cases, which would lead to two competing
16582         updates to the same register, or which are otherwise
16583         disallowed by the spec. */
16584      switch (summary) {
16585         case 1 | 16:
16586            break;
16587         case 1 | 32:
16588            if (rM == 15) goto after_load_store_ubyte_or_word;
16589            break;
16590         case 2 | 16: case 3 | 16:
16591            if (rN == 15) goto after_load_store_ubyte_or_word;
16592            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
16593            break;
16594         case 2 | 32: case 3 | 32:
16595            if (rM == 15) goto after_load_store_ubyte_or_word;
16596            if (rN == 15) goto after_load_store_ubyte_or_word;
16597            if (rN == rM) goto after_load_store_ubyte_or_word;
16598            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
16599            break;
16600         default:
16601            vassert(0);
16602      }
16603
16604      /* compute the effective address.  Bind it to a tmp since we
16605         may need to use it twice. */
16606      IRExpr* eaE = NULL;
16607      switch (summary & 0xF0) {
16608         case 16:
16609            eaE = mk_EA_reg_plusminus_imm12( rN, bU, imm12, dis_buf );
16610            break;
16611         case 32:
16612            eaE = mk_EA_reg_plusminus_shifted_reg( rN, bU, rM, sh2, imm5,
16613                                                   dis_buf );
16614            break;
16615      }
16616      vassert(eaE);
16617      IRTemp eaT = newTemp(Ity_I32);
16618      assign(eaT, eaE);
16619
16620      /* get the old Rn value */
16621      IRTemp rnT = newTemp(Ity_I32);
16622      assign(rnT, getIRegA(rN));
16623
16624      /* decide on the transfer address */
16625      IRTemp taT = IRTemp_INVALID;
16626      switch (summary & 0x0F) {
16627         case 1: case 2: taT = eaT; break;
16628         case 3:         taT = rnT; break;
16629      }
16630      vassert(taT != IRTemp_INVALID);
16631
16632      if (bL == 0) {
16633        /* Store.  If necessary, update the base register before the
16634           store itself, so that the common idiom of "str rX, [sp,
16635           #-4]!" (store rX at sp-4, then do new sp = sp-4, a.k.a "push
16636           rX") doesn't cause Memcheck to complain that the access is
16637           below the stack pointer.  Also, not updating sp before the
16638           store confuses Valgrind's dynamic stack-extending logic.  So
16639           do it before the store.  Hence we need to snarf the store
16640           data before doing the basereg update. */
16641
16642         /* get hold of the data to be stored */
16643         IRTemp rDt = newTemp(Ity_I32);
16644         assign(rDt, getIRegA(rD));
16645
16646         /* Update Rn if necessary. */
16647         switch (summary & 0x0F) {
16648            case 2: case 3:
16649               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16650               break;
16651         }
16652
16653         /* generate the transfer */
16654         if (bB == 0) { // word store
16655            storeGuardedLE( mkexpr(taT), mkexpr(rDt), condT );
16656         } else { // byte store
16657            vassert(bB == 1);
16658            storeGuardedLE( mkexpr(taT), unop(Iop_32to8, mkexpr(rDt)), condT );
16659         }
16660
16661      } else {
16662         /* Load */
16663         vassert(bL == 1);
16664
16665         /* generate the transfer */
16666         if (bB == 0) { // word load
16667            IRTemp jk = Ijk_Boring;
16668            /* According to the Cortex A8 TRM Sec. 5.2.1, LDR(1) with r13 as the
16669                base register and PC as the destination register is a return for
16670                purposes of branch prediction.
16671               The ARM ARM Sec. C9.10.1 further specifies that it must use a
16672                post-increment by immediate addressing mode to be counted in
16673                event 0x0E (Procedure return).*/
16674            if (rN == 13 && summary == (3 | 16) && bB == 0) {
16675               jk = Ijk_Ret;
16676            }
16677            IRTemp tD = newTemp(Ity_I32);
16678            loadGuardedLE( tD, ILGop_Ident32,
16679                           mkexpr(taT), llGetIReg(rD), condT );
16680            /* "rD == 15 ? condT : IRTemp_INVALID": simply
16681               IRTemp_INVALID would be correct in all cases here, and
16682               for the non-r15 case it generates better code, by
16683               avoiding two tests of the cond (since it is already
16684               tested by loadGuardedLE).  However, the logic at the end
16685               of this function, that deals with writes to r15, has an
16686               optimisation which depends on seeing whether or not the
16687               write is conditional.  Hence in this particular case we
16688               let it "see" the guard condition. */
16689            putIRegA( rD, mkexpr(tD),
16690                      rD == 15 ? condT : IRTemp_INVALID, jk );
16691         } else { // byte load
16692            vassert(bB == 1);
16693            IRTemp tD = newTemp(Ity_I32);
16694            loadGuardedLE( tD, ILGop_8Uto32, mkexpr(taT), llGetIReg(rD), condT );
16695            /* No point in similar 3rd arg complexity here, since we
16696               can't sanely write anything to r15 like this. */
16697            putIRegA( rD, mkexpr(tD), IRTemp_INVALID, Ijk_Boring );
16698         }
16699
16700         /* Update Rn if necessary. */
16701         switch (summary & 0x0F) {
16702            case 2: case 3:
16703               // should be assured by logic above:
16704               if (bL == 1)
16705                  vassert(rD != rN); /* since we just wrote rD */
16706               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16707               break;
16708         }
16709      }
16710
16711      switch (summary & 0x0F) {
16712         case 1:  DIP("%sr%s%s r%u, %s\n",
16713                      bL == 0 ? "st" : "ld",
16714                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
16715                  break;
16716         case 2:  DIP("%sr%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
16717                      bL == 0 ? "st" : "ld",
16718                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
16719                  break;
16720         case 3:  DIP("%sr%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
16721                      bL == 0 ? "st" : "ld",
16722                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
16723                  break;
16724         default: vassert(0);
16725      }
16726
16727      /* XXX deal with alignment constraints */
16728
16729      goto decode_success;
16730
16731      /* Complications:
16732
16733         For all loads: if the Amode specifies base register
16734         writeback, and the same register is specified for Rd and Rn,
16735         the results are UNPREDICTABLE.
16736
16737         For all loads and stores: if R15 is written, branch to
16738         that address afterwards.
16739
16740         STRB: straightforward
16741         LDRB: loaded data is zero extended
16742         STR:  lowest 2 bits of address are ignored
16743         LDR:  if the lowest 2 bits of the address are nonzero
16744               then the loaded value is rotated right by 8 * the lowest 2 bits
16745      */
16746    }
16747
16748   after_load_store_ubyte_or_word:
16749
16750    /* --------------------- Load/store (sbyte & hword) -------- */
16751    // LDRH LDRSH STRH LDRSB
16752    /*                 31   27   23   19 15 11   7    3     # highest bit
16753                         28   24   20 16 12    8    4    0
16754       A5-36   1 | 16  cond 0001 U10L Rn Rd im4h 1SH1 im4l
16755       A5-38   1 | 32  cond 0001 U00L Rn Rd 0000 1SH1 Rm
16756       A5-40   2 | 16  cond 0001 U11L Rn Rd im4h 1SH1 im4l
16757       A5-42   2 | 32  cond 0001 U01L Rn Rd 0000 1SH1 Rm
16758       A5-44   3 | 16  cond 0000 U10L Rn Rd im4h 1SH1 im4l
16759       A5-46   3 | 32  cond 0000 U00L Rn Rd 0000 1SH1 Rm
16760    */
16761    /* case coding:
16762              1   at-ea               (access at ea)
16763              2   at-ea-then-upd      (access at ea, then Rn = ea)
16764              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
16765       ea coding
16766              16  Rn +/- imm8
16767              32  Rn +/- Rm
16768    */
16769    /* Quickly skip over all of this for hopefully most instructions */
16770    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
16771       goto after_load_store_sbyte_or_hword;
16772
16773    /* Check the "1SH1" thing. */
16774    if ((INSN(7,4) & BITS4(1,0,0,1)) != BITS4(1,0,0,1))
16775       goto after_load_store_sbyte_or_hword;
16776
16777    summary = 0;
16778
16779    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,0)) {
16780       summary = 1 | 16;
16781    }
16782    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,0)) {
16783       summary = 1 | 32;
16784    }
16785    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,1)) {
16786       summary = 2 | 16;
16787    }
16788    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,1)) {
16789       summary = 2 | 32;
16790    }
16791    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(1,0)) {
16792       summary = 3 | 16;
16793    }
16794    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(0,0)) {
16795       summary = 3 | 32;
16796    }
16797    else goto after_load_store_sbyte_or_hword;
16798
16799    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
16800      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
16801      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
16802      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
16803      UInt bL   = (insn >> 20) & 1;   /* 20 L=1 load, L=0 store */
16804      UInt bH   = (insn >> 5) & 1;    /* H=1 halfword, H=0 byte */
16805      UInt bS   = (insn >> 6) & 1;    /* S=1 signed, S=0 unsigned */
16806      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
16807
16808      /* Skip combinations that are either meaningless or already
16809         handled by main word-or-unsigned-byte load-store
16810         instructions. */
16811      if (bS == 0 && bH == 0) /* "unsigned byte" */
16812         goto after_load_store_sbyte_or_hword;
16813      if (bS == 1 && bL == 0) /* "signed store" */
16814         goto after_load_store_sbyte_or_hword;
16815
16816      /* Require 11:8 == 0 for Rn +/- Rm cases */
16817      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
16818         goto after_load_store_sbyte_or_hword;
16819
16820      /* Skip some invalid cases, which would lead to two competing
16821         updates to the same register, or which are otherwise
16822         disallowed by the spec. */
16823      switch (summary) {
16824         case 1 | 16:
16825            break;
16826         case 1 | 32:
16827            if (rM == 15) goto after_load_store_sbyte_or_hword;
16828            break;
16829         case 2 | 16: case 3 | 16:
16830            if (rN == 15) goto after_load_store_sbyte_or_hword;
16831            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
16832            break;
16833         case 2 | 32: case 3 | 32:
16834            if (rM == 15) goto after_load_store_sbyte_or_hword;
16835            if (rN == 15) goto after_load_store_sbyte_or_hword;
16836            if (rN == rM) goto after_load_store_sbyte_or_hword;
16837            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
16838            break;
16839         default:
16840            vassert(0);
16841      }
16842
16843      /* If this is a branch, make it unconditional at this point.
16844         Doing conditional branches in-line is too complex (for now).
16845         Note that you'd have to be insane to use any of these loads to
16846         do a branch, since they only load 16 bits at most, but we
16847         handle it just in case. */
16848      if (bL == 1 && rD == 15 && condT != IRTemp_INVALID) {
16849         // go uncond
16850         mk_skip_over_A32_if_cond_is_false( condT );
16851         condT = IRTemp_INVALID;
16852         // now uncond
16853      }
16854
16855      /* compute the effective address.  Bind it to a tmp since we
16856         may need to use it twice. */
16857      IRExpr* eaE = NULL;
16858      switch (summary & 0xF0) {
16859         case 16:
16860            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
16861            break;
16862         case 32:
16863            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
16864            break;
16865      }
16866      vassert(eaE);
16867      IRTemp eaT = newTemp(Ity_I32);
16868      assign(eaT, eaE);
16869
16870      /* get the old Rn value */
16871      IRTemp rnT = newTemp(Ity_I32);
16872      assign(rnT, getIRegA(rN));
16873
16874      /* decide on the transfer address */
16875      IRTemp taT = IRTemp_INVALID;
16876      switch (summary & 0x0F) {
16877         case 1: case 2: taT = eaT; break;
16878         case 3:         taT = rnT; break;
16879      }
16880      vassert(taT != IRTemp_INVALID);
16881
16882      /* ll previous value of rD, for dealing with conditional loads */
16883      IRTemp llOldRd = newTemp(Ity_I32);
16884      assign(llOldRd, llGetIReg(rD));
16885
16886      /* halfword store  H 1  L 0  S 0
16887         uhalf load      H 1  L 1  S 0
16888         shalf load      H 1  L 1  S 1
16889         sbyte load      H 0  L 1  S 1
16890      */
16891      const HChar* name = NULL;
16892      /* generate the transfer */
16893      /**/ if (bH == 1 && bL == 0 && bS == 0) { // halfword store
16894         storeGuardedLE( mkexpr(taT),
16895                         unop(Iop_32to16, getIRegA(rD)), condT );
16896         name = "strh";
16897      }
16898      else if (bH == 1 && bL == 1 && bS == 0) { // uhalf load
16899         IRTemp newRd = newTemp(Ity_I32);
16900         loadGuardedLE( newRd, ILGop_16Uto32,
16901                        mkexpr(taT), mkexpr(llOldRd), condT );
16902         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
16903         name = "ldrh";
16904      }
16905      else if (bH == 1 && bL == 1 && bS == 1) { // shalf load
16906         IRTemp newRd = newTemp(Ity_I32);
16907         loadGuardedLE( newRd, ILGop_16Sto32,
16908                        mkexpr(taT), mkexpr(llOldRd), condT );
16909         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
16910         name = "ldrsh";
16911      }
16912      else if (bH == 0 && bL == 1 && bS == 1) { // sbyte load
16913         IRTemp newRd = newTemp(Ity_I32);
16914         loadGuardedLE( newRd, ILGop_8Sto32,
16915                        mkexpr(taT), mkexpr(llOldRd), condT );
16916         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
16917         name = "ldrsb";
16918      }
16919      else
16920         vassert(0); // should be assured by logic above
16921
16922      /* Update Rn if necessary. */
16923      switch (summary & 0x0F) {
16924         case 2: case 3:
16925            // should be assured by logic above:
16926            if (bL == 1)
16927               vassert(rD != rN); /* since we just wrote rD */
16928            putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16929            break;
16930      }
16931
16932      switch (summary & 0x0F) {
16933         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
16934                  break;
16935         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
16936                      name, nCC(INSN_COND), rD, dis_buf);
16937                  break;
16938         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
16939                      name, nCC(INSN_COND), rD, dis_buf);
16940                  break;
16941         default: vassert(0);
16942      }
16943
16944      /* XXX deal with alignment constraints */
16945
16946      goto decode_success;
16947
16948      /* Complications:
16949
16950         For all loads: if the Amode specifies base register
16951         writeback, and the same register is specified for Rd and Rn,
16952         the results are UNPREDICTABLE.
16953
16954         For all loads and stores: if R15 is written, branch to
16955         that address afterwards.
16956
16957         Misaligned halfword stores => Unpredictable
16958         Misaligned halfword loads  => Unpredictable
16959      */
16960    }
16961
16962   after_load_store_sbyte_or_hword:
16963
16964    /* --------------------- Load/store multiple -------------- */
16965    // LD/STMIA LD/STMIB LD/STMDA LD/STMDB
16966    // Remarkably complex and difficult to get right
16967    // match 27:20 as 100XX0WL
16968    if (BITS8(1,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,1,0,0))) {
16969       // A5-50 LD/STMIA  cond 1000 10WL Rn RegList
16970       // A5-51 LD/STMIB  cond 1001 10WL Rn RegList
16971       // A5-53 LD/STMDA  cond 1000 00WL Rn RegList
16972       // A5-53 LD/STMDB  cond 1001 00WL Rn RegList
16973       //                   28   24   20 16       0
16974
16975       UInt bINC    = (insn >> 23) & 1;
16976       UInt bBEFORE = (insn >> 24) & 1;
16977
16978       UInt bL      = (insn >> 20) & 1;  /* load=1, store=0 */
16979       UInt bW      = (insn >> 21) & 1;  /* Rn wback=1, no wback=0 */
16980       UInt rN      = (insn >> 16) & 0xF;
16981       UInt regList = insn & 0xFFFF;
16982       /* Skip some invalid cases, which would lead to two competing
16983          updates to the same register, or which are otherwise
16984          disallowed by the spec.  Note the test above has required
16985          that S == 0, since that looks like a kernel-mode only thing.
16986          Done by forcing the real pattern, viz 100XXSWL to actually be
16987          100XX0WL. */
16988       if (rN == 15) goto after_load_store_multiple;
16989       // reglist can't be empty
16990       if (regList == 0) goto after_load_store_multiple;
16991       // if requested to writeback Rn, and this is a load instruction,
16992       // then Rn can't appear in RegList, since we'd have two competing
16993       // new values for Rn.  We do however accept this case for store
16994       // instructions.
16995       if (bW == 1 && bL == 1 && ((1 << rN) & regList) > 0)
16996          goto after_load_store_multiple;
16997
16998       /* Now, we can't do a conditional load or store, since that very
16999          likely will generate an exception.  So we have to take a side
17000          exit at this point if the condition is false. */
17001       if (condT != IRTemp_INVALID) {
17002          mk_skip_over_A32_if_cond_is_false( condT );
17003          condT = IRTemp_INVALID;
17004       }
17005
17006       /* Ok, now we're unconditional.  Generate the IR. */
17007       mk_ldm_stm( True/*arm*/, rN, bINC, bBEFORE, bW, bL, regList );
17008
17009       DIP("%sm%c%c%s r%u%s, {0x%04x}\n",
17010           bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
17011           nCC(INSN_COND),
17012           rN, bW ? "!" : "", regList);
17013
17014       goto decode_success;
17015    }
17016
17017   after_load_store_multiple:
17018
17019    /* --------------------- Control flow --------------------- */
17020    // B, BL (Branch, or Branch-and-Link, to immediate offset)
17021    //
17022    if (BITS8(1,0,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))) {
17023       UInt link   = (insn >> 24) & 1;
17024       UInt uimm24 = insn & ((1<<24)-1);  uimm24 <<= 8;
17025       Int  simm24 = (Int)uimm24;         simm24 >>= 8;
17026       UInt dst    = guest_R15_curr_instr_notENC + 8 + (((UInt)simm24) << 2);
17027       IRJumpKind jk = link ? Ijk_Call : Ijk_Boring;
17028       if (link) {
17029          putIRegA(14, mkU32(guest_R15_curr_instr_notENC + 4),
17030                       condT, Ijk_Boring);
17031       }
17032       if (condT == IRTemp_INVALID) {
17033          /* Unconditional transfer to 'dst'.  Terminate the SB at this point. */
17034          llPutIReg(15, mkU32(dst));
17035          dres.jk_StopHere = jk;
17036          dres.whatNext    = Dis_StopHere;
17037          DIP("b%s 0x%x\n", link ? "l" : "", dst);
17038       } else {
17039          /* Conditional transfer to 'dst'.  Terminate the SB at this point. */
17040          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
17041                             jk, IRConst_U32(dst), OFFB_R15T ));
17042          llPutIReg(15, mkU32(guest_R15_curr_instr_notENC + 4));
17043          dres.jk_StopHere = Ijk_Boring;
17044          dres.whatNext    = Dis_StopHere;
17045          DIP("b%s%s 0x%x\n", link ? "l" : "", nCC(INSN_COND), dst);
17046       }
17047       goto decode_success;
17048    }
17049
17050    // B, BL (Branch, or Branch-and-Link, to a register)
17051    // NB: interworking branch
17052    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
17053        && INSN(19,12) == BITS8(1,1,1,1,1,1,1,1)
17054        && (INSN(11,4) == BITS8(1,1,1,1,0,0,1,1)
17055            || INSN(11,4) == BITS8(1,1,1,1,0,0,0,1))) {
17056       IRTemp  dst = newTemp(Ity_I32);
17057       UInt    link = (INSN(11,4) >> 1) & 1;
17058       UInt    rM   = INSN(3,0);
17059       // we don't decode the case (link && rM == 15), as that's
17060       // Unpredictable.
17061       if (!(link && rM == 15)) {
17062          if (condT != IRTemp_INVALID) {
17063             mk_skip_over_A32_if_cond_is_false( condT );
17064          }
17065          // rM contains an interworking address exactly as we require
17066          // (with continuation CPSR.T in bit 0), so we can use it
17067          // as-is, with no masking.
17068          assign( dst, getIRegA(rM) );
17069          if (link) {
17070             putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
17071                       IRTemp_INVALID/*because AL*/, Ijk_Boring );
17072          }
17073          llPutIReg(15, mkexpr(dst));
17074          dres.jk_StopHere = link ? Ijk_Call
17075                                  : (rM == 14 ? Ijk_Ret : Ijk_Boring);
17076          dres.whatNext    = Dis_StopHere;
17077          if (condT == IRTemp_INVALID) {
17078             DIP("b%sx r%u\n", link ? "l" : "", rM);
17079          } else {
17080             DIP("b%sx%s r%u\n", link ? "l" : "", nCC(INSN_COND), rM);
17081          }
17082          goto decode_success;
17083       }
17084       /* else: (link && rM == 15): just fall through */
17085    }
17086
17087    /* --- NB: ARM interworking branches are in NV space, hence
17088       are handled elsewhere by decode_NV_instruction_ARMv7_and_below.
17089       ---
17090    */
17091
17092    /* --------------------- Clz --------------------- */
17093    // CLZ
17094    if (INSN(27,20) == BITS8(0,0,0,1,0,1,1,0)
17095        && INSN(19,16) == BITS4(1,1,1,1)
17096        && INSN(11,4) == BITS8(1,1,1,1,0,0,0,1)) {
17097       UInt rD = INSN(15,12);
17098       UInt rM = INSN(3,0);
17099       IRTemp arg = newTemp(Ity_I32);
17100       IRTemp res = newTemp(Ity_I32);
17101       assign(arg, getIRegA(rM));
17102       assign(res, IRExpr_ITE(
17103                      binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
17104                      mkU32(32),
17105                      unop(Iop_Clz32, mkexpr(arg))
17106             ));
17107       putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17108       DIP("clz%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
17109       goto decode_success;
17110    }
17111
17112    /* --------------------- Mul etc --------------------- */
17113    // MUL
17114    if (BITS8(0,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
17115        && INSN(15,12) == BITS4(0,0,0,0)
17116        && INSN(7,4) == BITS4(1,0,0,1)) {
17117       UInt bitS = (insn >> 20) & 1; /* 20:20 */
17118       UInt rD = INSN(19,16);
17119       UInt rS = INSN(11,8);
17120       UInt rM = INSN(3,0);
17121       if (rD == 15 || rM == 15 || rS == 15) {
17122          /* Unpredictable; don't decode; fall through */
17123       } else {
17124          IRTemp argL = newTemp(Ity_I32);
17125          IRTemp argR = newTemp(Ity_I32);
17126          IRTemp res  = newTemp(Ity_I32);
17127          IRTemp oldC = IRTemp_INVALID;
17128          IRTemp oldV = IRTemp_INVALID;
17129          assign( argL, getIRegA(rM));
17130          assign( argR, getIRegA(rS));
17131          assign( res, binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) );
17132          if (bitS) {
17133             oldC = newTemp(Ity_I32);
17134             assign(oldC, mk_armg_calculate_flag_c());
17135             oldV = newTemp(Ity_I32);
17136             assign(oldV, mk_armg_calculate_flag_v());
17137          }
17138          // now update guest state
17139          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
17140          if (bitS) {
17141             IRTemp pair = newTemp(Ity_I32);
17142             assign( pair, binop(Iop_Or32,
17143                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17144                                 mkexpr(oldV)) );
17145             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
17146          }
17147          DIP("mul%c%s r%u, r%u, r%u\n",
17148              bitS ? 's' : ' ', nCC(INSN_COND), rD, rM, rS);
17149          goto decode_success;
17150       }
17151       /* fall through */
17152    }
17153
17154    /* --------------------- Integer Divides --------------------- */
17155    // SDIV
17156    if (BITS8(0,1,1,1,0,0,0,1) == INSN(27,20)
17157        && INSN(15,12) == BITS4(1,1,1,1)
17158        && INSN(7,4) == BITS4(0,0,0,1)) {
17159       UInt rD = INSN(19,16);
17160       UInt rM = INSN(11,8);
17161       UInt rN = INSN(3,0);
17162       if (rD == 15 || rM == 15 || rN == 15) {
17163          /* Unpredictable; don't decode; fall through */
17164       } else {
17165          IRTemp res  = newTemp(Ity_I32);
17166          IRTemp argL = newTemp(Ity_I32);
17167          IRTemp argR = newTemp(Ity_I32);
17168          assign(argL, getIRegA(rN));
17169          assign(argR, getIRegA(rM));
17170          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
17171          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17172          DIP("sdiv r%u, r%u, r%u\n", rD, rN, rM);
17173          goto decode_success;
17174       }
17175     }
17176
17177    // UDIV
17178    if (BITS8(0,1,1,1,0,0,1,1) == INSN(27,20)
17179        && INSN(15,12) == BITS4(1,1,1,1)
17180        && INSN(7,4) == BITS4(0,0,0,1)) {
17181       UInt rD = INSN(19,16);
17182       UInt rM = INSN(11,8);
17183       UInt rN = INSN(3,0);
17184       if (rD == 15 || rM == 15 || rN == 15) {
17185          /* Unpredictable; don't decode; fall through */
17186       } else {
17187          IRTemp res  = newTemp(Ity_I32);
17188          IRTemp argL = newTemp(Ity_I32);
17189          IRTemp argR = newTemp(Ity_I32);
17190          assign(argL, getIRegA(rN));
17191          assign(argR, getIRegA(rM));
17192          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
17193          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17194          DIP("udiv r%u, r%u, r%u\n", rD, rN, rM);
17195          goto decode_success;
17196       }
17197    }
17198
17199    // MLA, MLS
17200    if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17201        && INSN(7,4) == BITS4(1,0,0,1)) {
17202       UInt bitS  = (insn >> 20) & 1; /* 20:20 */
17203       UInt isMLS = (insn >> 22) & 1; /* 22:22 */
17204       UInt rD = INSN(19,16);
17205       UInt rN = INSN(15,12);
17206       UInt rS = INSN(11,8);
17207       UInt rM = INSN(3,0);
17208       if (bitS == 1 && isMLS == 1) {
17209          /* This isn't allowed (MLS that sets flags).  don't decode;
17210             fall through */
17211       }
17212       else
17213       if (rD == 15 || rM == 15 || rS == 15 || rN == 15) {
17214          /* Unpredictable; don't decode; fall through */
17215       } else {
17216          IRTemp argL = newTemp(Ity_I32);
17217          IRTemp argR = newTemp(Ity_I32);
17218          IRTemp argP = newTemp(Ity_I32);
17219          IRTemp res  = newTemp(Ity_I32);
17220          IRTemp oldC = IRTemp_INVALID;
17221          IRTemp oldV = IRTemp_INVALID;
17222          assign( argL, getIRegA(rM));
17223          assign( argR, getIRegA(rS));
17224          assign( argP, getIRegA(rN));
17225          assign( res, binop(isMLS ? Iop_Sub32 : Iop_Add32,
17226                             mkexpr(argP),
17227                             binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) ));
17228          if (bitS) {
17229             vassert(!isMLS); // guaranteed above
17230             oldC = newTemp(Ity_I32);
17231             assign(oldC, mk_armg_calculate_flag_c());
17232             oldV = newTemp(Ity_I32);
17233             assign(oldV, mk_armg_calculate_flag_v());
17234          }
17235          // now update guest state
17236          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
17237          if (bitS) {
17238             IRTemp pair = newTemp(Ity_I32);
17239             assign( pair, binop(Iop_Or32,
17240                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17241                                 mkexpr(oldV)) );
17242             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
17243          }
17244          DIP("ml%c%c%s r%u, r%u, r%u, r%u\n",
17245              isMLS ? 's' : 'a', bitS ? 's' : ' ',
17246              nCC(INSN_COND), rD, rM, rS, rN);
17247          goto decode_success;
17248       }
17249       /* fall through */
17250    }
17251
17252    // SMULL, UMULL
17253    if (BITS8(0,0,0,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17254        && INSN(7,4) == BITS4(1,0,0,1)) {
17255       UInt bitS = (insn >> 20) & 1; /* 20:20 */
17256       UInt rDhi = INSN(19,16);
17257       UInt rDlo = INSN(15,12);
17258       UInt rS   = INSN(11,8);
17259       UInt rM   = INSN(3,0);
17260       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
17261       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
17262          /* Unpredictable; don't decode; fall through */
17263       } else {
17264          IRTemp argL  = newTemp(Ity_I32);
17265          IRTemp argR  = newTemp(Ity_I32);
17266          IRTemp res   = newTemp(Ity_I64);
17267          IRTemp resHi = newTemp(Ity_I32);
17268          IRTemp resLo = newTemp(Ity_I32);
17269          IRTemp oldC  = IRTemp_INVALID;
17270          IRTemp oldV  = IRTemp_INVALID;
17271          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
17272          assign( argL, getIRegA(rM));
17273          assign( argR, getIRegA(rS));
17274          assign( res, binop(mulOp, mkexpr(argL), mkexpr(argR)) );
17275          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17276          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17277          if (bitS) {
17278             oldC = newTemp(Ity_I32);
17279             assign(oldC, mk_armg_calculate_flag_c());
17280             oldV = newTemp(Ity_I32);
17281             assign(oldV, mk_armg_calculate_flag_v());
17282          }
17283          // now update guest state
17284          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
17285          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
17286          if (bitS) {
17287             IRTemp pair = newTemp(Ity_I32);
17288             assign( pair, binop(Iop_Or32,
17289                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17290                                 mkexpr(oldV)) );
17291             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
17292          }
17293          DIP("%cmull%c%s r%u, r%u, r%u, r%u\n",
17294              isS ? 's' : 'u', bitS ? 's' : ' ',
17295              nCC(INSN_COND), rDlo, rDhi, rM, rS);
17296          goto decode_success;
17297       }
17298       /* fall through */
17299    }
17300
17301    // SMLAL, UMLAL
17302    if (BITS8(0,0,0,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17303        && INSN(7,4) == BITS4(1,0,0,1)) {
17304       UInt bitS = (insn >> 20) & 1; /* 20:20 */
17305       UInt rDhi = INSN(19,16);
17306       UInt rDlo = INSN(15,12);
17307       UInt rS   = INSN(11,8);
17308       UInt rM   = INSN(3,0);
17309       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
17310       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
17311          /* Unpredictable; don't decode; fall through */
17312       } else {
17313          IRTemp argL  = newTemp(Ity_I32);
17314          IRTemp argR  = newTemp(Ity_I32);
17315          IRTemp old   = newTemp(Ity_I64);
17316          IRTemp res   = newTemp(Ity_I64);
17317          IRTemp resHi = newTemp(Ity_I32);
17318          IRTemp resLo = newTemp(Ity_I32);
17319          IRTemp oldC  = IRTemp_INVALID;
17320          IRTemp oldV  = IRTemp_INVALID;
17321          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
17322          assign( argL, getIRegA(rM));
17323          assign( argR, getIRegA(rS));
17324          assign( old, binop(Iop_32HLto64, getIRegA(rDhi), getIRegA(rDlo)) );
17325          assign( res, binop(Iop_Add64,
17326                             mkexpr(old),
17327                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
17328          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17329          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17330          if (bitS) {
17331             oldC = newTemp(Ity_I32);
17332             assign(oldC, mk_armg_calculate_flag_c());
17333             oldV = newTemp(Ity_I32);
17334             assign(oldV, mk_armg_calculate_flag_v());
17335          }
17336          // now update guest state
17337          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
17338          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
17339          if (bitS) {
17340             IRTemp pair = newTemp(Ity_I32);
17341             assign( pair, binop(Iop_Or32,
17342                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17343                                 mkexpr(oldV)) );
17344             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
17345          }
17346          DIP("%cmlal%c%s r%u, r%u, r%u, r%u\n",
17347              isS ? 's' : 'u', bitS ? 's' : ' ', nCC(INSN_COND),
17348              rDlo, rDhi, rM, rS);
17349          goto decode_success;
17350       }
17351       /* fall through */
17352    }
17353
17354    // UMAAL
17355    if (BITS8(0,0,0,0,0,1,0,0) == INSN(27,20) && INSN(7,4) == BITS4(1,0,0,1)) {
17356       UInt rDhi = INSN(19,16);
17357       UInt rDlo = INSN(15,12);
17358       UInt rM   = INSN(11,8);
17359       UInt rN   = INSN(3,0);
17360       if (rDlo == 15 || rDhi == 15 || rN == 15 || rM == 15 || rDhi == rDlo)  {
17361          /* Unpredictable; don't decode; fall through */
17362       } else {
17363          IRTemp argN   = newTemp(Ity_I32);
17364          IRTemp argM   = newTemp(Ity_I32);
17365          IRTemp argDhi = newTemp(Ity_I32);
17366          IRTemp argDlo = newTemp(Ity_I32);
17367          IRTemp res    = newTemp(Ity_I64);
17368          IRTemp resHi  = newTemp(Ity_I32);
17369          IRTemp resLo  = newTemp(Ity_I32);
17370          assign( argN,   getIRegA(rN) );
17371          assign( argM,   getIRegA(rM) );
17372          assign( argDhi, getIRegA(rDhi) );
17373          assign( argDlo, getIRegA(rDlo) );
17374          assign( res,
17375                  binop(Iop_Add64,
17376                        binop(Iop_Add64,
17377                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
17378                              unop(Iop_32Uto64, mkexpr(argDhi))),
17379                        unop(Iop_32Uto64, mkexpr(argDlo))) );
17380          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17381          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17382          // now update guest state
17383          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
17384          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
17385          DIP("umaal %s r%u, r%u, r%u, r%u\n",
17386              nCC(INSN_COND), rDlo, rDhi, rN, rM);
17387          goto decode_success;
17388       }
17389       /* fall through */
17390    }
17391
17392    /* --------------------- Msr etc --------------------- */
17393
17394    // MSR apsr, #imm
17395    if (INSN(27,20) == BITS8(0,0,1,1,0,0,1,0)
17396        && INSN(17,12) == BITS6(0,0,1,1,1,1)) {
17397       UInt write_ge    = INSN(18,18);
17398       UInt write_nzcvq = INSN(19,19);
17399       if (write_nzcvq || write_ge) {
17400          UInt   imm = (INSN(11,0) >> 0) & 0xFF;
17401          UInt   rot = 2 * ((INSN(11,0) >> 8) & 0xF);
17402          IRTemp immT = newTemp(Ity_I32);
17403          vassert(rot <= 30);
17404          imm = ROR32(imm, rot);
17405          assign(immT, mkU32(imm));
17406          desynthesise_APSR( write_nzcvq, write_ge, immT, condT );
17407          DIP("msr%s cpsr%s%sf, #0x%08x\n", nCC(INSN_COND),
17408              write_nzcvq ? "f" : "", write_ge ? "g" : "", imm);
17409          goto decode_success;
17410       }
17411       /* fall through */
17412    }
17413
17414    // MSR apsr, reg
17415    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
17416        && INSN(17,12) == BITS6(0,0,1,1,1,1)
17417        && INSN(11,4) == BITS8(0,0,0,0,0,0,0,0)) {
17418       UInt rN          = INSN(3,0);
17419       UInt write_ge    = INSN(18,18);
17420       UInt write_nzcvq = INSN(19,19);
17421       if (rN != 15 && (write_nzcvq || write_ge)) {
17422          IRTemp rNt = newTemp(Ity_I32);
17423          assign(rNt, getIRegA(rN));
17424          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
17425          DIP("msr%s cpsr_%s%s, r%u\n", nCC(INSN_COND),
17426              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
17427          goto decode_success;
17428       }
17429       /* fall through */
17430    }
17431
17432    // MRS rD, cpsr
17433    if ((insn & 0x0FFF0FFF) == 0x010F0000) {
17434       UInt rD   = INSN(15,12);
17435       if (rD != 15) {
17436          IRTemp apsr = synthesise_APSR();
17437          putIRegA( rD, mkexpr(apsr), condT, Ijk_Boring );
17438          DIP("mrs%s r%u, cpsr\n", nCC(INSN_COND), rD);
17439          goto decode_success;
17440       }
17441       /* fall through */
17442    }
17443
17444    /* --------------------- Svc --------------------- */
17445    if (BITS8(1,1,1,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))) {
17446       UInt imm24 = (insn >> 0) & 0xFFFFFF;
17447       if (imm24 == 0) {
17448          /* A syscall.  We can't do this conditionally, hence: */
17449          if (condT != IRTemp_INVALID) {
17450             mk_skip_over_A32_if_cond_is_false( condT );
17451          }
17452          // AL after here
17453          llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 4 ));
17454          dres.jk_StopHere = Ijk_Sys_syscall;
17455          dres.whatNext    = Dis_StopHere;
17456          DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
17457          goto decode_success;
17458       }
17459       /* fall through */
17460    }
17461
17462    /* ------------------------ swp ------------------------ */
17463
17464    // SWP, SWPB
17465    if (BITS8(0,0,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
17466        && BITS4(0,0,0,0) == INSN(11,8)
17467        && BITS4(1,0,0,1) == INSN(7,4)) {
17468       UInt   rN   = INSN(19,16);
17469       UInt   rD   = INSN(15,12);
17470       UInt   rM   = INSN(3,0);
17471       IRTemp tRn  = newTemp(Ity_I32);
17472       IRTemp tNew = newTemp(Ity_I32);
17473       IRTemp tOld = IRTemp_INVALID;
17474       IRTemp tSC1 = newTemp(Ity_I1);
17475       UInt   isB  = (insn >> 22) & 1;
17476
17477       if (rD == 15 || rN == 15 || rM == 15 || rN == rM || rN == rD) {
17478          /* undecodable; fall through */
17479       } else {
17480          /* make unconditional */
17481          if (condT != IRTemp_INVALID) {
17482             mk_skip_over_A32_if_cond_is_false( condT );
17483             condT = IRTemp_INVALID;
17484          }
17485          /* Ok, now we're unconditional.  Generate a LL-SC loop. */
17486          assign(tRn, getIRegA(rN));
17487          assign(tNew, getIRegA(rM));
17488          if (isB) {
17489             /* swpb */
17490             tOld = newTemp(Ity_I8);
17491             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
17492                               NULL/*=>isLL*/) );
17493             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
17494                               unop(Iop_32to8, mkexpr(tNew))) );
17495          } else {
17496             /* swp */
17497             tOld = newTemp(Ity_I32);
17498             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
17499                               NULL/*=>isLL*/) );
17500             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
17501                               mkexpr(tNew)) );
17502          }
17503          stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
17504                            /*Ijk_NoRedir*/Ijk_Boring,
17505                            IRConst_U32(guest_R15_curr_instr_notENC),
17506                            OFFB_R15T ));
17507          putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
17508                       IRTemp_INVALID, Ijk_Boring);
17509          DIP("swp%s%s r%u, r%u, [r%u]\n",
17510              isB ? "b" : "", nCC(INSN_COND), rD, rM, rN);
17511          goto decode_success;
17512       }
17513       /* fall through */
17514    }
17515
17516    /* ----------------------------------------------------------- */
17517    /* -- ARMv6 instructions                                    -- */
17518    /* ----------------------------------------------------------- */
17519
17520    /* ------------------- {ldr,str}ex{,b,h,d} ------------------- */
17521
17522    // LDREXD, LDREX, LDREXH, LDREXB
17523    if (0x01900F9F == (insn & 0x0F900FFF)) {
17524       UInt   rT    = INSN(15,12);
17525       UInt   rN    = INSN(19,16);
17526       IRType ty    = Ity_INVALID;
17527       IROp   widen = Iop_INVALID;
17528       const HChar* nm = NULL;
17529       Bool   valid = True;
17530       switch (INSN(22,21)) {
17531          case 0: nm = "";  ty = Ity_I32; break;
17532          case 1: nm = "d"; ty = Ity_I64; break;
17533          case 2: nm = "b"; ty = Ity_I8;  widen = Iop_8Uto32; break;
17534          case 3: nm = "h"; ty = Ity_I16; widen = Iop_16Uto32; break;
17535          default: vassert(0);
17536       }
17537       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
17538          if (rT == 15 || rN == 15)
17539             valid = False;
17540       } else {
17541          vassert(ty == Ity_I64);
17542          if ((rT & 1) == 1 || rT == 14 || rN == 15)
17543             valid = False;
17544       }
17545       if (valid) {
17546          IRTemp res;
17547          /* make unconditional */
17548          if (condT != IRTemp_INVALID) {
17549            mk_skip_over_A32_if_cond_is_false( condT );
17550            condT = IRTemp_INVALID;
17551          }
17552          /* Ok, now we're unconditional.  Do the load. */
17553          res = newTemp(ty);
17554          // FIXME: assumes little-endian guest
17555          stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
17556                            NULL/*this is a load*/) );
17557          if (ty == Ity_I64) {
17558             // FIXME: assumes little-endian guest
17559             putIRegA(rT+0, unop(Iop_64to32, mkexpr(res)),
17560                            IRTemp_INVALID, Ijk_Boring);
17561             putIRegA(rT+1, unop(Iop_64HIto32, mkexpr(res)),
17562                            IRTemp_INVALID, Ijk_Boring);
17563             DIP("ldrex%s%s r%u, r%u, [r%u]\n",
17564                 nm, nCC(INSN_COND), rT+0, rT+1, rN);
17565          } else {
17566             putIRegA(rT, widen == Iop_INVALID
17567                             ? mkexpr(res) : unop(widen, mkexpr(res)),
17568                      IRTemp_INVALID, Ijk_Boring);
17569             DIP("ldrex%s%s r%u, [r%u]\n", nm, nCC(INSN_COND), rT, rN);
17570          }
17571          goto decode_success;
17572       }
17573       /* undecodable; fall through */
17574    }
17575
17576    // STREXD, STREX, STREXH, STREXB
17577    if (0x01800F90 == (insn & 0x0F900FF0)) {
17578       UInt   rT     = INSN(3,0);
17579       UInt   rN     = INSN(19,16);
17580       UInt   rD     = INSN(15,12);
17581       IRType ty     = Ity_INVALID;
17582       IROp   narrow = Iop_INVALID;
17583       const HChar* nm = NULL;
17584       Bool   valid  = True;
17585       switch (INSN(22,21)) {
17586          case 0: nm = "";  ty = Ity_I32; break;
17587          case 1: nm = "d"; ty = Ity_I64; break;
17588          case 2: nm = "b"; ty = Ity_I8;  narrow = Iop_32to8; break;
17589          case 3: nm = "h"; ty = Ity_I16; narrow = Iop_32to16; break;
17590          default: vassert(0);
17591       }
17592       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
17593          if (rD == 15 || rN == 15 || rT == 15
17594              || rD == rN || rD == rT)
17595             valid = False;
17596       } else {
17597          vassert(ty == Ity_I64);
17598          if (rD == 15 || (rT & 1) == 1 || rT == 14 || rN == 15
17599              || rD == rN || rD == rT || rD == rT+1)
17600             valid = False;
17601       }
17602       if (valid) {
17603          IRTemp resSC1, resSC32, data;
17604          /* make unconditional */
17605          if (condT != IRTemp_INVALID) {
17606             mk_skip_over_A32_if_cond_is_false( condT );
17607             condT = IRTemp_INVALID;
17608          }
17609          /* Ok, now we're unconditional.  Do the store. */
17610          data = newTemp(ty);
17611          assign(data,
17612                 ty == Ity_I64
17613                    // FIXME: assumes little-endian guest
17614                    ? binop(Iop_32HLto64, getIRegA(rT+1), getIRegA(rT+0))
17615                    : narrow == Iop_INVALID
17616                       ? getIRegA(rT)
17617                       : unop(narrow, getIRegA(rT)));
17618          resSC1 = newTemp(Ity_I1);
17619          // FIXME: assumes little-endian guest
17620          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) );
17621
17622          /* Set rD to 1 on failure, 0 on success.  Currently we have
17623             resSC1 == 0 on failure, 1 on success. */
17624          resSC32 = newTemp(Ity_I32);
17625          assign(resSC32,
17626                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
17627
17628          putIRegA(rD, mkexpr(resSC32),
17629                       IRTemp_INVALID, Ijk_Boring);
17630          if (ty == Ity_I64) {
17631             DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
17632                 nm, nCC(INSN_COND), rD, rT, rT+1, rN);
17633          } else {
17634             DIP("strex%s%s r%u, r%u, [r%u]\n",
17635                 nm, nCC(INSN_COND), rD, rT, rN);
17636          }
17637          goto decode_success;
17638       }
17639       /* fall through */
17640    }
17641
17642    /* --------------------- movw, movt --------------------- */
17643    if (0x03000000 == (insn & 0x0FF00000)
17644        || 0x03400000 == (insn & 0x0FF00000)) /* pray for CSE */ {
17645       UInt rD    = INSN(15,12);
17646       UInt imm16 = (insn & 0xFFF) | ((insn >> 4) & 0x0000F000);
17647       UInt isT   = (insn >> 22) & 1;
17648       if (rD == 15) {
17649          /* forget it */
17650       } else {
17651          if (isT) {
17652             putIRegA(rD,
17653                      binop(Iop_Or32,
17654                            binop(Iop_And32, getIRegA(rD), mkU32(0xFFFF)),
17655                            mkU32(imm16 << 16)),
17656                      condT, Ijk_Boring);
17657             DIP("movt%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
17658             goto decode_success;
17659          } else {
17660             putIRegA(rD, mkU32(imm16), condT, Ijk_Boring);
17661             DIP("movw%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
17662             goto decode_success;
17663          }
17664       }
17665       /* fall through */
17666    }
17667
17668    /* ----------- uxtb, sxtb, uxth, sxth, uxtb16, sxtb16 ----------- */
17669    /* FIXME: this is an exact duplicate of the Thumb version.  They
17670       should be commoned up. */
17671    if (BITS8(0,1,1,0,1, 0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,0))
17672        && BITS4(1,1,1,1) == INSN(19,16)
17673        && BITS4(0,1,1,1) == INSN(7,4)
17674        && BITS4(0,0, 0,0) == (INSN(11,8) & BITS4(0,0,1,1))) {
17675       UInt subopc = INSN(27,20) & BITS8(0,0,0,0,0, 1,1,1);
17676       if (subopc != BITS4(0,0,0,1) && subopc != BITS4(0,1,0,1)) {
17677          Int    rot  = (INSN(11,8) >> 2) & 3;
17678          UInt   rM   = INSN(3,0);
17679          UInt   rD   = INSN(15,12);
17680          IRTemp srcT = newTemp(Ity_I32);
17681          IRTemp rotT = newTemp(Ity_I32);
17682          IRTemp dstT = newTemp(Ity_I32);
17683          const HChar* nm = "???";
17684          assign(srcT, getIRegA(rM));
17685          assign(rotT, genROR32(srcT, 8 * rot)); /* 0, 8, 16 or 24 only */
17686          switch (subopc) {
17687             case BITS4(0,1,1,0): // UXTB
17688                assign(dstT, unop(Iop_8Uto32, unop(Iop_32to8, mkexpr(rotT))));
17689                nm = "uxtb";
17690                break;
17691             case BITS4(0,0,1,0): // SXTB
17692                assign(dstT, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rotT))));
17693                nm = "sxtb";
17694                break;
17695             case BITS4(0,1,1,1): // UXTH
17696                assign(dstT, unop(Iop_16Uto32, unop(Iop_32to16, mkexpr(rotT))));
17697                nm = "uxth";
17698                break;
17699             case BITS4(0,0,1,1): // SXTH
17700                assign(dstT, unop(Iop_16Sto32, unop(Iop_32to16, mkexpr(rotT))));
17701                nm = "sxth";
17702                break;
17703             case BITS4(0,1,0,0): // UXTB16
17704                assign(dstT, binop(Iop_And32, mkexpr(rotT), mkU32(0x00FF00FF)));
17705                nm = "uxtb16";
17706                break;
17707             case BITS4(0,0,0,0): { // SXTB16
17708                IRTemp lo32 = newTemp(Ity_I32);
17709                IRTemp hi32 = newTemp(Ity_I32);
17710                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
17711                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
17712                assign(
17713                   dstT,
17714                   binop(Iop_Or32,
17715                         binop(Iop_And32,
17716                               unop(Iop_8Sto32,
17717                                    unop(Iop_32to8, mkexpr(lo32))),
17718                               mkU32(0xFFFF)),
17719                         binop(Iop_Shl32,
17720                               unop(Iop_8Sto32,
17721                                    unop(Iop_32to8, mkexpr(hi32))),
17722                               mkU8(16))
17723                ));
17724                nm = "sxtb16";
17725                break;
17726             }
17727             default:
17728                vassert(0); // guarded by "if" above
17729          }
17730          putIRegA(rD, mkexpr(dstT), condT, Ijk_Boring);
17731          DIP("%s%s r%u, r%u, ROR #%d\n", nm, nCC(INSN_COND), rD, rM, rot);
17732          goto decode_success;
17733       }
17734       /* fall through */
17735    }
17736
17737    /* ------------------- bfi, bfc ------------------- */
17738    if (BITS8(0,1,1,1,1,1,0, 0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
17739        && BITS4(0, 0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
17740       UInt rD  = INSN(15,12);
17741       UInt rN  = INSN(3,0);
17742       UInt msb = (insn >> 16) & 0x1F; /* 20:16 */
17743       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
17744       if (rD == 15 || msb < lsb) {
17745          /* undecodable; fall through */
17746       } else {
17747          IRTemp src    = newTemp(Ity_I32);
17748          IRTemp olddst = newTemp(Ity_I32);
17749          IRTemp newdst = newTemp(Ity_I32);
17750          UInt   mask   = ((UInt)1) << (msb - lsb);
17751          mask = (mask - 1) + mask;
17752          vassert(mask != 0); // guaranteed by "msb < lsb" check above
17753          mask <<= lsb;
17754
17755          assign(src, rN == 15 ? mkU32(0) : getIRegA(rN));
17756          assign(olddst, getIRegA(rD));
17757          assign(newdst,
17758                 binop(Iop_Or32,
17759                    binop(Iop_And32,
17760                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
17761                          mkU32(mask)),
17762                    binop(Iop_And32,
17763                          mkexpr(olddst),
17764                          mkU32(~mask)))
17765                );
17766
17767          putIRegA(rD, mkexpr(newdst), condT, Ijk_Boring);
17768
17769          if (rN == 15) {
17770             DIP("bfc%s r%u, #%u, #%u\n",
17771                 nCC(INSN_COND), rD, lsb, msb-lsb+1);
17772          } else {
17773             DIP("bfi%s r%u, r%u, #%u, #%u\n",
17774                 nCC(INSN_COND), rD, rN, lsb, msb-lsb+1);
17775          }
17776          goto decode_success;
17777       }
17778       /* fall through */
17779    }
17780
17781    /* ------------------- {u,s}bfx ------------------- */
17782    if (BITS8(0,1,1,1,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17783        && BITS4(0,1,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
17784       UInt rD  = INSN(15,12);
17785       UInt rN  = INSN(3,0);
17786       UInt wm1 = (insn >> 16) & 0x1F; /* 20:16 */
17787       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
17788       UInt msb = lsb + wm1;
17789       UInt isU = (insn >> 22) & 1;    /* 22:22 */
17790       if (rD == 15 || rN == 15 || msb >= 32) {
17791          /* undecodable; fall through */
17792       } else {
17793          IRTemp src  = newTemp(Ity_I32);
17794          IRTemp tmp  = newTemp(Ity_I32);
17795          IRTemp res  = newTemp(Ity_I32);
17796          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
17797          vassert(msb >= 0 && msb <= 31);
17798          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
17799
17800          assign(src, getIRegA(rN));
17801          assign(tmp, binop(Iop_And32,
17802                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
17803                            mkU32(mask)));
17804          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
17805                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
17806                            mkU8(31-wm1)));
17807
17808          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17809
17810          DIP("%s%s r%u, r%u, #%u, #%u\n",
17811              isU ? "ubfx" : "sbfx",
17812              nCC(INSN_COND), rD, rN, lsb, wm1 + 1);
17813          goto decode_success;
17814       }
17815       /* fall through */
17816    }
17817
17818    /* --------------------- Load/store doubleword ------------- */
17819    // LDRD STRD
17820    /*                 31   27   23   19 15 11   7    3     # highest bit
17821                         28   24   20 16 12    8    4    0
17822       A5-36   1 | 16  cond 0001 U100 Rn Rd im4h 11S1 im4l
17823       A5-38   1 | 32  cond 0001 U000 Rn Rd 0000 11S1 Rm
17824       A5-40   2 | 16  cond 0001 U110 Rn Rd im4h 11S1 im4l
17825       A5-42   2 | 32  cond 0001 U010 Rn Rd 0000 11S1 Rm
17826       A5-44   3 | 16  cond 0000 U100 Rn Rd im4h 11S1 im4l
17827       A5-46   3 | 32  cond 0000 U000 Rn Rd 0000 11S1 Rm
17828    */
17829    /* case coding:
17830              1   at-ea               (access at ea)
17831              2   at-ea-then-upd      (access at ea, then Rn = ea)
17832              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
17833       ea coding
17834              16  Rn +/- imm8
17835              32  Rn +/- Rm
17836    */
17837    /* Quickly skip over all of this for hopefully most instructions */
17838    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
17839       goto after_load_store_doubleword;
17840
17841    /* Check the "11S1" thing. */
17842    if ((INSN(7,4) & BITS4(1,1,0,1)) != BITS4(1,1,0,1))
17843       goto after_load_store_doubleword;
17844
17845    summary = 0;
17846
17847    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,0,0)) {
17848       summary = 1 | 16;
17849    }
17850    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,0,0)) {
17851       summary = 1 | 32;
17852    }
17853    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,1,0)) {
17854       summary = 2 | 16;
17855    }
17856    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,1,0)) {
17857       summary = 2 | 32;
17858    }
17859    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(1,0,0)) {
17860       summary = 3 | 16;
17861    }
17862    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(0,0,0)) {
17863       summary = 3 | 32;
17864    }
17865    else goto after_load_store_doubleword;
17866
17867    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
17868      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
17869      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
17870      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
17871      UInt bS   = (insn >> 5) & 1;    /* S=1 store, S=0 load */
17872      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
17873
17874      /* Require rD to be an even numbered register */
17875      if ((rD & 1) != 0)
17876         goto after_load_store_doubleword;
17877
17878      /* Require 11:8 == 0 for Rn +/- Rm cases */
17879      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
17880         goto after_load_store_doubleword;
17881
17882      /* Skip some invalid cases, which would lead to two competing
17883         updates to the same register, or which are otherwise
17884         disallowed by the spec. */
17885      switch (summary) {
17886         case 1 | 16:
17887            break;
17888         case 1 | 32:
17889            if (rM == 15) goto after_load_store_doubleword;
17890            break;
17891         case 2 | 16: case 3 | 16:
17892            if (rN == 15) goto after_load_store_doubleword;
17893            if (bS == 0 && (rN == rD || rN == rD+1))
17894               goto after_load_store_doubleword;
17895            break;
17896         case 2 | 32: case 3 | 32:
17897            if (rM == 15) goto after_load_store_doubleword;
17898            if (rN == 15) goto after_load_store_doubleword;
17899            if (rN == rM) goto after_load_store_doubleword;
17900            if (bS == 0 && (rN == rD || rN == rD+1))
17901               goto after_load_store_doubleword;
17902            break;
17903         default:
17904            vassert(0);
17905      }
17906
17907      /* If this is a branch, make it unconditional at this point.
17908         Doing conditional branches in-line is too complex (for
17909         now). */
17910      vassert((rD & 1) == 0); /* from tests above */
17911      if (bS == 0 && rD+1 == 15 && condT != IRTemp_INVALID) {
17912         // go uncond
17913         mk_skip_over_A32_if_cond_is_false( condT );
17914         condT = IRTemp_INVALID;
17915         // now uncond
17916      }
17917
17918      /* compute the effective address.  Bind it to a tmp since we
17919         may need to use it twice. */
17920      IRExpr* eaE = NULL;
17921      switch (summary & 0xF0) {
17922         case 16:
17923            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
17924            break;
17925         case 32:
17926            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
17927            break;
17928      }
17929      vassert(eaE);
17930      IRTemp eaT = newTemp(Ity_I32);
17931      assign(eaT, eaE);
17932
17933      /* get the old Rn value */
17934      IRTemp rnT = newTemp(Ity_I32);
17935      assign(rnT, getIRegA(rN));
17936
17937      /* decide on the transfer address */
17938      IRTemp taT = IRTemp_INVALID;
17939      switch (summary & 0x0F) {
17940         case 1: case 2: taT = eaT; break;
17941         case 3:         taT = rnT; break;
17942      }
17943      vassert(taT != IRTemp_INVALID);
17944
17945      /* XXX deal with alignment constraints */
17946      /* XXX: but the A8 doesn't seem to trap for misaligned loads, so,
17947         ignore alignment issues for the time being. */
17948
17949      /* For almost all cases, we do the writeback after the transfers.
17950         However, that leaves the stack "uncovered" in cases like:
17951            strd    rD, [sp, #-8]
17952            strd    rD, [sp, #-16]
17953         In which case, do the writeback to SP now, instead of later.
17954         This is bad in that it makes the insn non-restartable if the
17955         accesses fault, but at least keeps Memcheck happy. */
17956      Bool writeback_already_done = False;
17957      if (bS == 1 /*store*/ && summary == (2 | 16)
17958          && rN == 13 && rN != rD && rN != rD+1
17959          && bU == 0/*minus*/
17960          && (imm8 == 8 || imm8 == 16)) {
17961         putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
17962         writeback_already_done = True;
17963      }
17964
17965      /* doubleword store  S 1
17966         doubleword load   S 0
17967      */
17968      const HChar* name = NULL;
17969      /* generate the transfers */
17970      if (bS == 1) { // doubleword store
17971         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(0)),
17972                         getIRegA(rD+0), condT );
17973         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(4)),
17974                         getIRegA(rD+1), condT );
17975         name = "strd";
17976      } else { // doubleword load
17977         IRTemp oldRd0 = newTemp(Ity_I32);
17978         IRTemp oldRd1 = newTemp(Ity_I32);
17979         assign(oldRd0, llGetIReg(rD+0));
17980         assign(oldRd1, llGetIReg(rD+1));
17981         IRTemp newRd0 = newTemp(Ity_I32);
17982         IRTemp newRd1 = newTemp(Ity_I32);
17983         loadGuardedLE( newRd0, ILGop_Ident32,
17984                        binop(Iop_Add32, mkexpr(taT), mkU32(0)),
17985                        mkexpr(oldRd0), condT );
17986         putIRegA( rD+0, mkexpr(newRd0), IRTemp_INVALID, Ijk_Boring );
17987         loadGuardedLE( newRd1, ILGop_Ident32,
17988                        binop(Iop_Add32, mkexpr(taT), mkU32(4)),
17989                        mkexpr(oldRd1), condT );
17990         putIRegA( rD+1, mkexpr(newRd1), IRTemp_INVALID, Ijk_Boring );
17991         name = "ldrd";
17992      }
17993
17994      /* Update Rn if necessary. */
17995      switch (summary & 0x0F) {
17996         case 2: case 3:
17997            // should be assured by logic above:
17998            vassert(rN != 15); /* from checks above */
17999            if (bS == 0) {
18000               vassert(rD+0 != rN); /* since we just wrote rD+0 */
18001               vassert(rD+1 != rN); /* since we just wrote rD+1 */
18002            }
18003            if (!writeback_already_done)
18004               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
18005            break;
18006      }
18007
18008      switch (summary & 0x0F) {
18009         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
18010                  break;
18011         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
18012                      name, nCC(INSN_COND), rD, dis_buf);
18013                  break;
18014         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
18015                      name, nCC(INSN_COND), rD, dis_buf);
18016                  break;
18017         default: vassert(0);
18018      }
18019
18020      goto decode_success;
18021    }
18022
18023   after_load_store_doubleword:
18024
18025    /* ------------------- {s,u}xtab ------------- */
18026    if (BITS8(0,1,1,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
18027        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
18028        && BITS4(0,1,1,1) == INSN(7,4)) {
18029       UInt rN  = INSN(19,16);
18030       UInt rD  = INSN(15,12);
18031       UInt rM  = INSN(3,0);
18032       UInt rot = (insn >> 10) & 3;
18033       UInt isU = INSN(22,22);
18034       if (rN == 15/*it's {S,U}XTB*/ || rD == 15 || rM == 15) {
18035          /* undecodable; fall through */
18036       } else {
18037          IRTemp srcL = newTemp(Ity_I32);
18038          IRTemp srcR = newTemp(Ity_I32);
18039          IRTemp res  = newTemp(Ity_I32);
18040          assign(srcR, getIRegA(rM));
18041          assign(srcL, getIRegA(rN));
18042          assign(res,  binop(Iop_Add32,
18043                             mkexpr(srcL),
18044                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
18045                                  unop(Iop_32to8,
18046                                       genROR32(srcR, 8 * rot)))));
18047          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18048          DIP("%cxtab%s r%u, r%u, r%u, ror #%u\n",
18049              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
18050          goto decode_success;
18051       }
18052       /* fall through */
18053    }
18054
18055    /* ------------------- {s,u}xtah ------------- */
18056    if (BITS8(0,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
18057        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
18058        && BITS4(0,1,1,1) == INSN(7,4)) {
18059       UInt rN  = INSN(19,16);
18060       UInt rD  = INSN(15,12);
18061       UInt rM  = INSN(3,0);
18062       UInt rot = (insn >> 10) & 3;
18063       UInt isU = INSN(22,22);
18064       if (rN == 15/*it's {S,U}XTH*/ || rD == 15 || rM == 15) {
18065          /* undecodable; fall through */
18066       } else {
18067          IRTemp srcL = newTemp(Ity_I32);
18068          IRTemp srcR = newTemp(Ity_I32);
18069          IRTemp res  = newTemp(Ity_I32);
18070          assign(srcR, getIRegA(rM));
18071          assign(srcL, getIRegA(rN));
18072          assign(res,  binop(Iop_Add32,
18073                             mkexpr(srcL),
18074                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
18075                                  unop(Iop_32to16,
18076                                       genROR32(srcR, 8 * rot)))));
18077          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18078
18079          DIP("%cxtah%s r%u, r%u, r%u, ror #%u\n",
18080              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
18081          goto decode_success;
18082       }
18083       /* fall through */
18084    }
18085
18086    /* ------------------- rev16, rev ------------------ */
18087    if (INSN(27,16) == 0x6BF
18088        && (INSN(11,4) == 0xFB/*rev16*/ || INSN(11,4) == 0xF3/*rev*/)) {
18089       Bool isREV = INSN(11,4) == 0xF3;
18090       UInt rM    = INSN(3,0);
18091       UInt rD    = INSN(15,12);
18092       if (rM != 15 && rD != 15) {
18093          IRTemp rMt = newTemp(Ity_I32);
18094          assign(rMt, getIRegA(rM));
18095          IRTemp res = isREV ? gen_REV(rMt) : gen_REV16(rMt);
18096          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18097          DIP("rev%s%s r%u, r%u\n", isREV ? "" : "16",
18098              nCC(INSN_COND), rD, rM);
18099          goto decode_success;
18100       }
18101    }
18102
18103    /* ------------------- revsh ----------------------- */
18104    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xFB) {
18105       UInt rM = INSN(3,0);
18106       UInt rD = INSN(15,12);
18107       if (rM != 15 && rD != 15) {
18108          IRTemp irt_rM  = newTemp(Ity_I32);
18109          IRTemp irt_hi  = newTemp(Ity_I32);
18110          IRTemp irt_low = newTemp(Ity_I32);
18111          IRTemp irt_res = newTemp(Ity_I32);
18112          assign(irt_rM, getIRegA(rM));
18113          assign(irt_hi,
18114                 binop(Iop_Sar32,
18115                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
18116                       mkU8(16)
18117                 )
18118          );
18119          assign(irt_low,
18120                 binop(Iop_And32,
18121                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
18122                       mkU32(0xFF)
18123                 )
18124          );
18125          assign(irt_res,
18126                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
18127          );
18128          putIRegA(rD, mkexpr(irt_res), condT, Ijk_Boring);
18129          DIP("revsh%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
18130          goto decode_success;
18131       }
18132    }
18133
18134    /* ------------------- rbit ------------------ */
18135    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xF3) {
18136       UInt rD = INSN(15,12);
18137       UInt rM = INSN(3,0);
18138       if (rD != 15 && rM != 15) {
18139          IRTemp arg = newTemp(Ity_I32);
18140          assign(arg, getIRegA(rM));
18141          IRTemp res = gen_BITREV(arg);
18142          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18143          DIP("rbit r%u, r%u\n", rD, rM);
18144          goto decode_success;
18145       }
18146    }
18147
18148    /* ------------------- smmul ------------------ */
18149    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
18150        && INSN(15,12) == BITS4(1,1,1,1)
18151        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
18152       UInt bitR = INSN(5,5);
18153       UInt rD = INSN(19,16);
18154       UInt rM = INSN(11,8);
18155       UInt rN = INSN(3,0);
18156       if (rD != 15 && rM != 15 && rN != 15) {
18157          IRExpr* res
18158          = unop(Iop_64HIto32,
18159                 binop(Iop_Add64,
18160                       binop(Iop_MullS32, getIRegA(rN), getIRegA(rM)),
18161                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
18162          putIRegA(rD, res, condT, Ijk_Boring);
18163          DIP("smmul%s%s r%u, r%u, r%u\n",
18164              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM);
18165          goto decode_success;
18166       }
18167    }
18168
18169    /* ------------------- smmla ------------------ */
18170    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
18171        && INSN(15,12) != BITS4(1,1,1,1)
18172        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
18173       UInt bitR = INSN(5,5);
18174       UInt rD = INSN(19,16);
18175       UInt rA = INSN(15,12);
18176       UInt rM = INSN(11,8);
18177       UInt rN = INSN(3,0);
18178       if (rD != 15 && rM != 15 && rN != 15) {
18179          IRExpr* res
18180          = unop(Iop_64HIto32,
18181                 binop(Iop_Add64,
18182                       binop(Iop_Add64,
18183                             binop(Iop_32HLto64, getIRegA(rA), mkU32(0)),
18184                             binop(Iop_MullS32, getIRegA(rN), getIRegA(rM))),
18185                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
18186          putIRegA(rD, res, condT, Ijk_Boring);
18187          DIP("smmla%s%s r%u, r%u, r%u, r%u\n",
18188              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM, rA);
18189          goto decode_success;
18190       }
18191    }
18192
18193    /* -------------- (A1) LDRT reg+/-#imm12 -------------- */
18194    /* Load Register Unprivileged:
18195       ldrt<c> Rt, [Rn] {, #+/-imm12}
18196    */
18197    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,1) ) {
18198       UInt rT     = INSN(15,12);
18199       UInt rN     = INSN(19,16);
18200       UInt imm12  = INSN(11,0);
18201       UInt bU     = INSN(23,23);
18202       Bool valid  = True;
18203       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18204       if (valid) {
18205          IRTemp newRt = newTemp(Ity_I32);
18206          loadGuardedLE( newRt,
18207                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
18208          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18209          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18210                              getIRegA(rN), mkU32(imm12));
18211          putIRegA(rN, erN, condT, Ijk_Boring);
18212          DIP("ldrt%s r%u, [r%u], #%c%u\n",
18213              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18214          goto decode_success;
18215       }
18216    }
18217
18218    /* -------------- (A2) LDRT reg+/-reg with shift -------------- */
18219    /* Load Register Unprivileged:
18220       ldrt<c> Rt, [Rn], +/-Rm{, shift}
18221    */
18222    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,1)
18223         && INSN(4,4) == 0 ) {
18224       UInt rT     = INSN(15,12);
18225       UInt rN     = INSN(19,16);
18226       UInt rM     = INSN(3,0);
18227       UInt imm5   = INSN(11,7);
18228       UInt bU     = INSN(23,23);
18229       UInt type   = INSN(6,5);
18230       Bool valid  = True;
18231       if (rT == 15 || rN == 15 || rN == rT || rM == 15
18232           /* || (ArchVersion() < 6 && rM == rN) */)
18233          valid = False;
18234       if (valid) {
18235          IRTemp newRt = newTemp(Ity_I32);
18236          loadGuardedLE( newRt,
18237                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
18238          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18239          // dis_buf generated is slightly bogus, in fact.
18240          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18241                                                        type, imm5, dis_buf);
18242          putIRegA(rN, erN, condT, Ijk_Boring);
18243          DIP("ldrt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18244          goto decode_success;
18245       }
18246    }
18247
18248    /* -------------- (A1) LDRBT reg+/-#imm12 -------------- */
18249    /* Load Register Byte Unprivileged:
18250       ldrbt<c> Rt, [Rn], #+/-imm12
18251    */
18252    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,1) ) {
18253       UInt rT     = INSN(15,12);
18254       UInt rN     = INSN(19,16);
18255       UInt imm12  = INSN(11,0);
18256       UInt bU     = INSN(23,23);
18257       Bool valid  = True;
18258       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18259       if (valid) {
18260          IRTemp newRt = newTemp(Ity_I32);
18261          loadGuardedLE( newRt,
18262                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
18263          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18264          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18265                              getIRegA(rN), mkU32(imm12));
18266          putIRegA(rN, erN, condT, Ijk_Boring);
18267          DIP("ldrbt%s r%u, [r%u], #%c%u\n",
18268              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18269          goto decode_success;
18270       }
18271    }
18272
18273    /* -------------- (A2) LDRBT reg+/-reg with shift -------------- */
18274    /* Load Register Byte Unprivileged:
18275       ldrbt<c> Rt, [Rn], +/-Rm{, shift}
18276    */
18277    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,1)
18278         && INSN(4,4) == 0 ) {
18279       UInt rT     = INSN(15,12);
18280       UInt rN     = INSN(19,16);
18281       UInt rM     = INSN(3,0);
18282       UInt imm5   = INSN(11,7);
18283       UInt bU     = INSN(23,23);
18284       UInt type   = INSN(6,5);
18285       Bool valid  = True;
18286       if (rT == 15 || rN == 15 || rN == rT || rM == 15
18287           /* || (ArchVersion() < 6 && rM == rN) */)
18288          valid = False;
18289       if (valid) {
18290          IRTemp newRt = newTemp(Ity_I32);
18291          loadGuardedLE( newRt,
18292                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
18293          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18294          // dis_buf generated is slightly bogus, in fact.
18295          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18296                                                        type, imm5, dis_buf);
18297          putIRegA(rN, erN, condT, Ijk_Boring);
18298          DIP("ldrbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18299          goto decode_success;
18300       }
18301    }
18302
18303    /* -------------- (A1) LDRHT reg+#imm8 -------------- */
18304    /* Load Register Halfword Unprivileged:
18305       ldrht<c> Rt, [Rn] {, #+/-imm8}
18306    */
18307    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
18308        && INSN(7,4) == BITS4(1,0,1,1) ) {
18309       UInt rT    = INSN(15,12);
18310       UInt rN    = INSN(19,16);
18311       UInt bU    = INSN(23,23);
18312       UInt imm4H = INSN(11,8);
18313       UInt imm4L = INSN(3,0);
18314       UInt imm8  = (imm4H << 4) | imm4L;
18315       Bool valid = True;
18316       if (rT == 15 || rN == 15 || rN == rT)
18317          valid = False;
18318       if (valid) {
18319          IRTemp newRt = newTemp(Ity_I32);
18320          loadGuardedLE( newRt,
18321                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
18322          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18323          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18324                              getIRegA(rN), mkU32(imm8));
18325          putIRegA(rN, erN, condT, Ijk_Boring);
18326          DIP("ldrht%s r%u, [r%u], #%c%u\n",
18327              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18328          goto decode_success;
18329       }
18330    }
18331
18332    /* -------------- (A2) LDRHT reg+/-reg -------------- */
18333    /* Load Register Halfword Unprivileged:
18334       ldrht<c> Rt, [Rn], +/-Rm
18335    */
18336    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
18337        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
18338       UInt rT    = INSN(15,12);
18339       UInt rN    = INSN(19,16);
18340       UInt rM    = INSN(3,0);
18341       UInt bU    = INSN(23,23);
18342       Bool valid = True;
18343       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
18344          valid = False;
18345       if (valid) {
18346          IRTemp newRt = newTemp(Ity_I32);
18347          loadGuardedLE( newRt,
18348                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
18349          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18350          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18351                              getIRegA(rN), getIRegA(rM));
18352          putIRegA(rN, erN, condT, Ijk_Boring);
18353          DIP("ldrht%s r%u, [r%u], %cr%u\n",
18354              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18355          goto decode_success;
18356       }
18357    }
18358
18359    /* -------------- (A1) LDRSHT reg+#imm8 -------------- */
18360    /* Load Register Signed Halfword Unprivileged:
18361       ldrsht<c> Rt, [Rn] {, #+/-imm8}
18362    */
18363    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
18364        && INSN(7,4) == BITS4(1,1,1,1)) {
18365       UInt rT    = INSN(15,12);
18366       UInt rN    = INSN(19,16);
18367       UInt bU    = INSN(23,23);
18368       UInt imm4H = INSN(11,8);
18369       UInt imm4L = INSN(3,0);
18370       UInt imm8  = (imm4H << 4) | imm4L;
18371       Bool valid = True;
18372       if (rN == 15 || rT == 15 || rN == rT)
18373          valid = False;
18374       if (valid) {
18375          IRTemp newRt = newTemp(Ity_I32);
18376          loadGuardedLE( newRt,
18377                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
18378          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18379          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18380                              getIRegA(rN), mkU32(imm8));
18381          putIRegA(rN, erN, condT, Ijk_Boring);
18382          DIP("ldrsht%s r%u, [r%u], #%c%u\n",
18383              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18384          goto decode_success;
18385       }
18386    }
18387
18388    /* -------------- (A2) LDRSHT reg+/-reg -------------- */
18389    /* Load Register Signed Halfword Unprivileged:
18390       ldrsht<c> Rt, [Rn], +/-Rm
18391    */
18392    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
18393        && INSN(11,4) == BITS8(0,0,0,0,1,1,1,1)) {
18394       UInt rT    = INSN(15,12);
18395       UInt rN    = INSN(19,16);
18396       UInt rM    = INSN(3,0);
18397       UInt bU    = INSN(23,23);
18398       Bool valid = True;
18399       if (rN == 15 || rT == 15 || rN == rT || rM == 15)
18400          valid = False;
18401       if (valid) {
18402          IRTemp newRt = newTemp(Ity_I32);
18403          loadGuardedLE( newRt,
18404                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
18405          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18406          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18407                              getIRegA(rN), getIRegA(rM));
18408          putIRegA(rN, erN, condT, Ijk_Boring);
18409          DIP("ldrsht%s r%u, [r%u], %cr%u\n",
18410              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18411          goto decode_success;
18412       }
18413    }
18414
18415    /* -------------- (A1) LDRSBT reg+#imm8 -------------- */
18416    /* Load Register Signed Byte Unprivileged:
18417       ldrsbt<c> Rt, [Rn] {, #+/-imm8}
18418    */
18419    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
18420        && INSN(7,4) == BITS4(1,1,0,1)) {
18421       UInt rT    = INSN(15,12);
18422       UInt rN    = INSN(19,16);
18423       UInt bU    = INSN(23,23);
18424       UInt imm4H = INSN(11,8);
18425       UInt imm4L = INSN(3,0);
18426       UInt imm8  = (imm4H << 4) | imm4L;
18427       Bool valid = True;
18428       if (rT == 15 || rN == 15 || rN == rT)
18429          valid = False;
18430       if (valid) {
18431          IRTemp newRt = newTemp(Ity_I32);
18432          loadGuardedLE( newRt,
18433                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
18434          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18435          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18436                              getIRegA(rN), mkU32(imm8));
18437          putIRegA(rN, erN, condT, Ijk_Boring);
18438          DIP("ldrsbt%s r%u, [r%u], #%c%u\n",
18439              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18440          goto decode_success;
18441       }
18442    }
18443
18444    /* -------------- (A2) LDRSBT reg+/-reg -------------- */
18445    /* Load Register Signed Byte Unprivileged:
18446       ldrsbt<c> Rt, [Rn], +/-Rm
18447    */
18448    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
18449        && INSN(11,4) == BITS8(0,0,0,0,1,1,0,1)) {
18450       UInt rT    = INSN(15,12);
18451       UInt rN    = INSN(19,16);
18452       UInt bU    = INSN(23,23);
18453       UInt rM    = INSN(3,0);
18454       Bool valid = True;
18455       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
18456          valid = False;
18457       if (valid) {
18458          IRTemp newRt = newTemp(Ity_I32);
18459          loadGuardedLE( newRt,
18460                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
18461          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18462          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18463                              getIRegA(rN), getIRegA(rM));
18464          putIRegA(rN, erN, condT, Ijk_Boring);
18465          DIP("ldrsbt%s r%u, [r%u], %cr%u\n",
18466              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18467          goto decode_success;
18468       }
18469    }
18470
18471    /* -------------- (A1) STRBT reg+#imm12 -------------- */
18472    /* Store Register Byte Unprivileged:
18473       strbt<c> Rt, [Rn], #+/-imm12
18474    */
18475    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,0) ) {
18476       UInt rT     = INSN(15,12);
18477       UInt rN     = INSN(19,16);
18478       UInt imm12  = INSN(11,0);
18479       UInt bU     = INSN(23,23);
18480       Bool valid = True;
18481       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18482       if (valid) {
18483          IRExpr* address = getIRegA(rN);
18484          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
18485          storeGuardedLE( address, data, condT);
18486          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18487                                getIRegA(rN), mkU32(imm12));
18488          putIRegA(rN, newRn, condT, Ijk_Boring);
18489          DIP("strbt%s r%u, [r%u], #%c%u\n",
18490              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18491          goto decode_success;
18492       }
18493    }
18494
18495    /* -------------- (A2) STRBT reg+/-reg -------------- */
18496    /* Store Register Byte Unprivileged:
18497       strbt<c> Rt, [Rn], +/-Rm{, shift}
18498    */
18499    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,0)
18500        && INSN(4,4) == 0) {
18501       UInt rT     = INSN(15,12);
18502       UInt rN     = INSN(19,16);
18503       UInt imm5   = INSN(11,7);
18504       UInt type   = INSN(6,5);
18505       UInt rM     = INSN(3,0);
18506       UInt bU     = INSN(23,23);
18507       Bool valid  = True;
18508       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
18509       if (valid) {
18510          IRExpr* address = getIRegA(rN);
18511          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
18512          storeGuardedLE( address, data, condT);
18513          // dis_buf generated is slightly bogus, in fact.
18514          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18515                                                        type, imm5, dis_buf);
18516          putIRegA(rN, erN, condT, Ijk_Boring);
18517          DIP("strbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18518          goto decode_success;
18519       }
18520    }
18521
18522    /* -------------- (A1) STRHT reg+#imm8 -------------- */
18523    /* Store Register Halfword Unprivileged:
18524       strht<c> Rt, [Rn], #+/-imm8
18525    */
18526    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,0)
18527        && INSN(7,4) == BITS4(1,0,1,1) ) {
18528       UInt rT    = INSN(15,12);
18529       UInt rN    = INSN(19,16);
18530       UInt imm4H = INSN(11,8);
18531       UInt imm4L = INSN(3,0);
18532       UInt imm8  = (imm4H << 4) | imm4L;
18533       UInt bU    = INSN(23,23);
18534       Bool valid = True;
18535       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18536       if (valid) {
18537          IRExpr* address = getIRegA(rN);
18538          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
18539          storeGuardedLE( address, data, condT);
18540          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18541                                getIRegA(rN), mkU32(imm8));
18542          putIRegA(rN, newRn, condT, Ijk_Boring);
18543          DIP("strht%s r%u, [r%u], #%c%u\n",
18544              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18545          goto decode_success;
18546       }
18547    }
18548
18549    /* -------------- (A2) STRHT reg+reg -------------- */
18550    /* Store Register Halfword Unprivileged:
18551       strht<c> Rt, [Rn], +/-Rm
18552    */
18553    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,0)
18554        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
18555       UInt rT    = INSN(15,12);
18556       UInt rN    = INSN(19,16);
18557       UInt rM    = INSN(3,0);
18558       UInt bU    = INSN(23,23);
18559       Bool valid = True;
18560       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
18561       if (valid) {
18562          IRExpr* address = getIRegA(rN);
18563          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
18564          storeGuardedLE( address, data, condT);
18565          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18566                                getIRegA(rN), getIRegA(rM));
18567          putIRegA(rN, newRn, condT, Ijk_Boring);
18568          DIP("strht%s r%u, [r%u], %cr%u\n",
18569              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18570          goto decode_success;
18571       }
18572    }
18573
18574    /* -------------- (A1) STRT reg+imm12 -------------- */
18575    /* Store Register Unprivileged:
18576       strt<c> Rt, [Rn], #+/-imm12
18577    */
18578    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,0) ) {
18579       UInt rT    = INSN(15,12);
18580       UInt rN    = INSN(19,16);
18581       UInt imm12 = INSN(11,0);
18582       UInt bU    = INSN(23,23);
18583       Bool valid = True;
18584       if (rN == 15 || rN == rT) valid = False;
18585       if (valid) {
18586          IRExpr* address = getIRegA(rN);
18587          storeGuardedLE( address, getIRegA(rT), condT);
18588          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18589                                getIRegA(rN), mkU32(imm12));
18590          putIRegA(rN, newRn, condT, Ijk_Boring);
18591          DIP("strt%s r%u, [r%u], %c%u\n",
18592              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18593          goto decode_success;
18594       }
18595    }
18596
18597    /* -------------- (A2) STRT reg+reg -------------- */
18598    /* Store Register Unprivileged:
18599       strt<c> Rt, [Rn], +/-Rm{, shift}
18600    */
18601    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,0)
18602        && INSN(4,4) == 0 ) {
18603       UInt rT    = INSN(15,12);
18604       UInt rN    = INSN(19,16);
18605       UInt rM    = INSN(3,0);
18606       UInt type  = INSN(6,5);
18607       UInt imm5  = INSN(11,7);
18608       UInt bU    = INSN(23,23);
18609       Bool valid = True;
18610       if (rN == 15 || rN == rT || rM == 15) valid = False;
18611       /* FIXME We didn't do:
18612          if ArchVersion() < 6 && rM == rN then UNPREDICTABLE */
18613       if (valid) {
18614          storeGuardedLE( getIRegA(rN), getIRegA(rT), condT);
18615          // dis_buf generated is slightly bogus, in fact.
18616          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18617                                                        type, imm5, dis_buf);
18618          putIRegA(rN, erN, condT, Ijk_Boring);
18619          DIP("strt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18620          goto decode_success;
18621       }
18622    }
18623
18624    /* ----------------------------------------------------------- */
18625    /* -- ARMv7 instructions                                    -- */
18626    /* ----------------------------------------------------------- */
18627
18628    /* -------------- read CP15 TPIDRURO register ------------- */
18629    /* mrc     p15, 0, r0,  c13, c0, 3  up to
18630       mrc     p15, 0, r14, c13, c0, 3
18631    */
18632    /* I don't know whether this is really v7-only.  But anyway, we
18633       have to support it since arm-linux uses TPIDRURO as a thread
18634       state register. */
18635    if (0x0E1D0F70 == (insn & 0x0FFF0FFF)) {
18636       UInt rD = INSN(15,12);
18637       if (rD <= 14) {
18638          /* skip r15, that's too stupid to handle */
18639          putIRegA(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32),
18640                       condT, Ijk_Boring);
18641          DIP("mrc%s p15,0, r%u, c13, c0, 3\n", nCC(INSN_COND), rD);
18642          goto decode_success;
18643       }
18644       /* fall through */
18645    }
18646
18647    /* ------------ read/write CP15 TPIDRURW register ----------- */
18648    /* mcr     p15, 0, r0,  c13, c0, 2 (r->cr xfer)  up to
18649       mcr     p15, 0, r14, c13, c0, 2
18650
18651       mrc     p15, 0, r0,  c13, c0, 2 (rc->r xfer)  up to
18652       mrc     p15, 0, r14, c13, c0, 2
18653    */
18654    if (0x0E0D0F50 == (insn & 0x0FFF0FFF)) { // MCR
18655       UInt rS = INSN(15,12);
18656       if (rS <= 14) {
18657          /* skip r15, that's too stupid to handle */
18658          putMiscReg32(OFFB_TPIDRURW, getIRegA(rS), condT);
18659          DIP("mcr%s p15,0, r%u, c13, c0, 2\n", nCC(INSN_COND), rS);
18660          goto decode_success;
18661       }
18662       /* fall through */
18663    }
18664    if (0x0E1D0F50 == (insn & 0x0FFF0FFF)) { // MRC
18665       UInt rD = INSN(15,12);
18666       if (rD <= 14) {
18667          /* skip r15, that's too stupid to handle */
18668          putIRegA(rD, IRExpr_Get(OFFB_TPIDRURW, Ity_I32),
18669                       condT, Ijk_Boring);
18670          DIP("mrc%s p15,0, r%u, c13, c0, 2\n", nCC(INSN_COND), rD);
18671          goto decode_success;
18672       }
18673       /* fall through */
18674    }
18675
18676    /* -------------- read CP15 PMUSRENR register ------------- */
18677    /* mrc     p15, 0, r0,  c9, c14, 0  up to
18678       mrc     p15, 0, r14, c9, c14, 0
18679    */
18680    /* A program reading this register is really asking "which
18681       performance monitoring registes are available in user space?
18682       The simple answer here is to return zero, meaning "none".  See
18683       #345984. */
18684    if (0x0E190F1E == (insn & 0x0FFF0FFF)) {
18685       UInt rD = INSN(15,12);
18686       if (rD <= 14) {
18687          /* skip r15, that's too stupid to handle */
18688          putIRegA(rD, mkU32(0), condT, Ijk_Boring);
18689          DIP("mrc%s p15,0, r%u, c9, c14, 0\n", nCC(INSN_COND), rD);
18690          goto decode_success;
18691       }
18692       /* fall through */
18693    }
18694
18695    /* Handle various kinds of barriers.  This is rather indiscriminate
18696       in the sense that they are all turned into an IR Fence, which
18697       means we don't know which they are, so the back end has to
18698       re-emit them all when it comes acrosss an IR Fence.
18699    */
18700    /* v6 */ /* mcr 15, 0, rT, c7, c10, 5 */
18701    if (0xEE070FBA == (insn & 0xFFFF0FFF)) {
18702       UInt rT = INSN(15,12);
18703       if (rT <= 14) {
18704          /* mcr 15, 0, rT, c7, c10, 5 (v6) equiv to DMB (v7).  Data
18705             Memory Barrier -- ensures ordering of memory accesses. */
18706          stmt( IRStmt_MBE(Imbe_Fence) );
18707          DIP("mcr 15, 0, r%u, c7, c10, 5 (data memory barrier)\n", rT);
18708          goto decode_success;
18709       }
18710       /* fall through */
18711    }
18712    /* other flavours of barrier */
18713    switch (insn) {
18714       case 0xEE070F9A: /* v6 */
18715          /* mcr 15, 0, r0, c7, c10, 4 (v6) equiv to DSB (v7).  Data
18716             Synch Barrier -- ensures completion of memory accesses. */
18717          stmt( IRStmt_MBE(Imbe_Fence) );
18718          DIP("mcr 15, 0, r0, c7, c10, 4 (data synch barrier)\n");
18719          goto decode_success;
18720       case 0xEE070F95: /* v6 */
18721          /* mcr 15, 0, r0, c7, c5, 4 (v6) equiv to ISB (v7).
18722             Instruction Synchronisation Barrier (or Flush Prefetch
18723             Buffer) -- a pipe flush, I think.  I suspect we could
18724             ignore those, but to be on the safe side emit a fence
18725             anyway. */
18726          stmt( IRStmt_MBE(Imbe_Fence) );
18727          DIP("mcr 15, 0, r0, c7, c5, 4 (insn synch barrier)\n");
18728          goto decode_success;
18729       default:
18730          break;
18731    }
18732
18733    /* ----------------------------------------------------------- */
18734    /* -- Hints                                                 -- */
18735    /* ----------------------------------------------------------- */
18736
18737    switch (insn & 0x0FFFFFFF) {
18738       /* ------------------- NOP ------------------ */
18739       case 0x0320F000:
18740          DIP("nop%s\n", nCC(INSN_COND));
18741          goto decode_success;
18742       /* ------------------- YIELD ------------------ */
18743       case 0x0320F001:
18744          /* Continue after conditionally yielding. */
18745          DIP("yield%s\n", nCC(INSN_COND));
18746          stmt( IRStmt_Exit( unop(Iop_32to1,
18747                                  condT == IRTemp_INVALID
18748                                     ? mkU32(1) : mkexpr(condT)),
18749                             Ijk_Yield,
18750                             IRConst_U32(guest_R15_curr_instr_notENC + 4),
18751                             OFFB_R15T ));
18752          goto decode_success;
18753       default:
18754          break;
18755    }
18756
18757    /* ----------------------------------------------------------- */
18758    /* -- VFP (CP 10, CP 11) instructions (in ARM mode)         -- */
18759    /* ----------------------------------------------------------- */
18760
18761    if (INSN_COND != ARMCondNV) {
18762       Bool ok_vfp = decode_CP10_CP11_instruction (
18763                        &dres, INSN(27,0), condT, INSN_COND,
18764                        False/*!isT*/
18765                     );
18766       if (ok_vfp)
18767          goto decode_success;
18768    }
18769
18770    /* ----------------------------------------------------------- */
18771    /* -- NEON instructions (in ARM mode)                       -- */
18772    /* ----------------------------------------------------------- */
18773
18774    /* These are all in NV space, and so are taken care of (far) above,
18775       by a call from this function to
18776       decode_NV_instruction_ARMv7_and_below(). */
18777
18778    /* ----------------------------------------------------------- */
18779    /* -- v6 media instructions (in ARM mode)                   -- */
18780    /* ----------------------------------------------------------- */
18781
18782    { Bool ok_v6m = decode_V6MEDIA_instruction(
18783                        &dres, INSN(27,0), condT, INSN_COND,
18784                        False/*!isT*/
18785                    );
18786      if (ok_v6m)
18787         goto decode_success;
18788    }
18789
18790    /* ----------------------------------------------------------- */
18791    /* -- v8 instructions (in ARM mode)                         -- */
18792    /* ----------------------------------------------------------- */
18793
18794   after_v7_decoder:
18795
18796    /* If we get here, it means that all attempts to decode the
18797       instruction as ARMv7 or earlier have failed.  So, if we're doing
18798       ARMv8 or later, here is the point to try for it. */
18799
18800    if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
18801       Bool ok_v8
18802          = decode_V8_instruction( &dres, insn, condT, False/*!isT*/,
18803                                   IRTemp_INVALID, IRTemp_INVALID );
18804       if (ok_v8)
18805          goto decode_success;
18806    }
18807
18808    /* ----------------------------------------------------------- */
18809    /* -- Undecodable                                           -- */
18810    /* ----------------------------------------------------------- */
18811
18812    goto decode_failure;
18813    /*NOTREACHED*/
18814
18815   decode_failure:
18816    /* All decode failures end up here. */
18817    if (sigill_diag) {
18818       vex_printf("disInstr(arm): unhandled instruction: "
18819                  "0x%x\n", insn);
18820       vex_printf("                 cond=%d(0x%x) 27:20=%d(0x%02x) "
18821                                    "4:4=%d "
18822                                    "3:0=%d(0x%x)\n",
18823                  (Int)INSN_COND, (UInt)INSN_COND,
18824                  (Int)INSN(27,20), (UInt)INSN(27,20),
18825                  (Int)INSN(4,4),
18826                  (Int)INSN(3,0), (UInt)INSN(3,0) );
18827    }
18828
18829    /* Tell the dispatcher that this insn cannot be decoded, and so has
18830       not been executed, and (is currently) the next to be executed.
18831       R15 should be up-to-date since it made so at the start of each
18832       insn, but nevertheless be paranoid and update it again right
18833       now. */
18834    vassert(0 == (guest_R15_curr_instr_notENC & 3));
18835    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
18836    dres.len         = 0;
18837    dres.whatNext    = Dis_StopHere;
18838    dres.jk_StopHere = Ijk_NoDecode;
18839    return dres;
18840
18841   decode_success:
18842    /* All decode successes end up here. */
18843    DIP("\n");
18844
18845    vassert(dres.len == 4 || dres.len == 20);
18846
18847    /* Now then.  Do we have an implicit jump to r15 to deal with? */
18848    if (r15written) {
18849       /* If we get jump to deal with, we assume that there's been no
18850          other competing branch stuff previously generated for this
18851          insn.  That's reasonable, in the sense that the ARM insn set
18852          appears to declare as "Unpredictable" any instruction which
18853          generates more than one possible new value for r15.  Hence
18854          just assert.  The decoders themselves should check against
18855          all such instructions which are thusly Unpredictable, and
18856          decline to decode them.  Hence we should never get here if we
18857          have competing new values for r15, and hence it is safe to
18858          assert here. */
18859       vassert(dres.whatNext == Dis_Continue);
18860       vassert(irsb->next == NULL);
18861       vassert(irsb->jumpkind == Ijk_Boring);
18862       /* If r15 is unconditionally written, terminate the block by
18863          jumping to it.  If it's conditionally written, still
18864          terminate the block (a shame, but we can't do side exits to
18865          arbitrary destinations), but first jump to the next
18866          instruction if the condition doesn't hold. */
18867       /* We can't use getIReg(15) to get the destination, since that
18868          will produce r15+8, which isn't what we want.  Must use
18869          llGetIReg(15) instead. */
18870       if (r15guard == IRTemp_INVALID) {
18871          /* unconditional */
18872       } else {
18873          /* conditional */
18874          stmt( IRStmt_Exit(
18875                   unop(Iop_32to1,
18876                        binop(Iop_Xor32,
18877                              mkexpr(r15guard), mkU32(1))),
18878                   r15kind,
18879                   IRConst_U32(guest_R15_curr_instr_notENC + 4),
18880                   OFFB_R15T
18881          ));
18882       }
18883       /* This seems crazy, but we're required to finish the insn with
18884          a write to the guest PC.  As usual we rely on ir_opt to tidy
18885          up later. */
18886       llPutIReg(15, llGetIReg(15));
18887       dres.whatNext    = Dis_StopHere;
18888       dres.jk_StopHere = r15kind;
18889    } else {
18890       /* Set up the end-state in the normal way. */
18891       switch (dres.whatNext) {
18892          case Dis_Continue:
18893             llPutIReg(15, mkU32(dres.len + guest_R15_curr_instr_notENC));
18894             break;
18895          case Dis_StopHere:
18896             break;
18897          default:
18898             vassert(0);
18899       }
18900    }
18901
18902    return dres;
18903
18904 #  undef INSN_COND
18905 #  undef INSN
18906 }
18907
18908
18909 /*------------------------------------------------------------*/
18910 /*--- Disassemble a single Thumb2 instruction              ---*/
18911 /*------------------------------------------------------------*/
18912
18913 static const UChar it_length_table[256]; /* fwds */
18914
18915 /* NB: in Thumb mode we do fetches of regs with getIRegT, which
18916    automagically adds 4 to fetches of r15.  However, writes to regs
18917    are done with putIRegT, which disallows writes to r15.  Hence any
18918    r15 writes and associated jumps have to be done "by hand". */
18919
18920 /* Disassemble a single Thumb instruction into IR.  The instruction is
18921    located in host memory at guest_instr, and has (decoded) guest IP
18922    of guest_R15_curr_instr_notENC, which will have been set before the
18923    call here. */
18924
18925 static
18926 DisResult disInstr_THUMB_WRK (
18927              const UChar* guest_instr,
18928              const VexArchInfo* archinfo,
18929              const VexAbiInfo*  abiinfo,
18930              Bool         sigill_diag
18931           )
18932 {
18933    /* A macro to fish bits out of insn0.  There's also INSN1, to fish
18934       bits out of insn1, but that's defined only after the end of the
18935       16-bit insn decoder, so as to stop it mistakenly being used
18936       therein. */
18937 #  define INSN0(_bMax,_bMin)  SLICE_UInt(((UInt)insn0), (_bMax), (_bMin))
18938
18939    DisResult dres;
18940    UShort    insn0; /*  first 16 bits of the insn */
18941    UShort    insn1; /* second 16 bits of the insn */
18942    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
18943
18944    /* Summary result of the ITxxx backwards analysis: False == safe
18945       but suboptimal. */
18946    Bool guaranteedUnconditional = False;
18947
18948    /* Set result defaults. */
18949    dres.whatNext    = Dis_Continue;
18950    dres.len         = 2;
18951    dres.jk_StopHere = Ijk_INVALID;
18952    dres.hint        = Dis_HintNone;
18953
18954    /* Set default actions for post-insn handling of writes to r15, if
18955       required. */
18956    r15written = False;
18957    r15guard   = IRTemp_INVALID; /* unconditional */
18958    r15kind    = Ijk_Boring;
18959
18960    /* Insns could be 2 or 4 bytes long.  Just get the first 16 bits at
18961       this point.  If we need the second 16, get them later.  We can't
18962       get them both out immediately because it risks a fault (very
18963       unlikely, but ..) if the second 16 bits aren't actually
18964       necessary. */
18965    insn0 = getUShortLittleEndianly( guest_instr );
18966    insn1 = 0; /* We'll get it later, once we know we need it. */
18967
18968    /* Similarly, will set this later. */
18969    IRTemp old_itstate = IRTemp_INVALID;
18970
18971    if (0) vex_printf("insn: 0x%x\n", insn0);
18972
18973    DIP("\t(thumb) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
18974
18975    vassert(0 == (guest_R15_curr_instr_notENC & 1));
18976
18977    /* ----------------------------------------------------------- */
18978    /* Spot "Special" instructions (see comment at top of file). */
18979    {
18980       const UChar* code = guest_instr;
18981       /* Spot the 16-byte preamble:
18982
18983          ea4f 0cfc  mov.w   ip, ip, ror #3
18984          ea4f 3c7c  mov.w   ip, ip, ror #13
18985          ea4f 7c7c  mov.w   ip, ip, ror #29
18986          ea4f 4cfc  mov.w   ip, ip, ror #19
18987       */
18988       UInt word1 = 0x0CFCEA4F;
18989       UInt word2 = 0x3C7CEA4F;
18990       UInt word3 = 0x7C7CEA4F;
18991       UInt word4 = 0x4CFCEA4F;
18992       if (getUIntLittleEndianly(code+ 0) == word1 &&
18993           getUIntLittleEndianly(code+ 4) == word2 &&
18994           getUIntLittleEndianly(code+ 8) == word3 &&
18995           getUIntLittleEndianly(code+12) == word4) {
18996          /* Got a "Special" instruction preamble.  Which one is it? */
18997          // 0x 0A 0A EA 4A
18998          if (getUIntLittleEndianly(code+16) == 0x0A0AEA4A
18999                                                /* orr.w r10,r10,r10 */) {
19000             /* R3 = client_request ( R4 ) */
19001             DIP("r3 = client_request ( %%r4 )\n");
19002             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
19003             dres.jk_StopHere = Ijk_ClientReq;
19004             dres.whatNext    = Dis_StopHere;
19005             goto decode_success;
19006          }
19007          else
19008          // 0x 0B 0B EA 4B
19009          if (getUIntLittleEndianly(code+16) == 0x0B0BEA4B
19010                                                /* orr r11,r11,r11 */) {
19011             /* R3 = guest_NRADDR */
19012             DIP("r3 = guest_NRADDR\n");
19013             dres.len = 20;
19014             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
19015             goto decode_success;
19016          }
19017          else
19018          // 0x 0C 0C EA 4C
19019          if (getUIntLittleEndianly(code+16) == 0x0C0CEA4C
19020                                                /* orr r12,r12,r12 */) {
19021             /*  branch-and-link-to-noredir R4 */
19022             DIP("branch-and-link-to-noredir r4\n");
19023             llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
19024             llPutIReg(15, getIRegT(4));
19025             dres.jk_StopHere = Ijk_NoRedir;
19026             dres.whatNext    = Dis_StopHere;
19027             goto decode_success;
19028          }
19029          else
19030          // 0x 09 09 EA 49
19031          if (getUIntLittleEndianly(code+16) == 0x0909EA49
19032                                                /* orr r9,r9,r9 */) {
19033             /* IR injection */
19034             DIP("IR injection\n");
19035             vex_inject_ir(irsb, Iend_LE);
19036             // Invalidate the current insn. The reason is that the IRop we're
19037             // injecting here can change. In which case the translation has to
19038             // be redone. For ease of handling, we simply invalidate all the
19039             // time.
19040             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
19041             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
19042             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
19043             dres.whatNext    = Dis_StopHere;
19044             dres.jk_StopHere = Ijk_InvalICache;
19045             goto decode_success;
19046          }
19047          /* We don't know what it is.  Set insn0 so decode_failure
19048             can print the insn following the Special-insn preamble. */
19049          insn0 = getUShortLittleEndianly(code+16);
19050          goto decode_failure;
19051          /*NOTREACHED*/
19052       }
19053
19054    }
19055
19056    /* ----------------------------------------------------------- */
19057
19058    /* Main Thumb instruction decoder starts here.  It's a series of
19059       switches which examine ever longer bit sequences at the MSB of
19060       the instruction word, first for 16-bit insns, then for 32-bit
19061       insns. */
19062
19063    /* --- BEGIN ITxxx optimisation analysis --- */
19064    /* This is a crucial optimisation for the ITState boilerplate that
19065       follows.  Examine the 9 halfwords preceding this instruction,
19066       and if we are absolutely sure that none of them constitute an
19067       'it' instruction, then we can be sure that this instruction is
19068       not under the control of any 'it' instruction, and so
19069       guest_ITSTATE must be zero.  So write zero into ITSTATE right
19070       now, so that iropt can fold out almost all of the resulting
19071       junk.
19072
19073       If we aren't sure, we can always safely skip this step.  So be a
19074       bit conservative about it: only poke around in the same page as
19075       this instruction, lest we get a fault from the previous page
19076       that would not otherwise have happened.  The saving grace is
19077       that such skipping is pretty rare -- it only happens,
19078       statistically, 18/4096ths of the time, so is judged unlikely to
19079       be a performance problems.
19080
19081       FIXME: do better.  Take into account the number of insns covered
19082       by any IT insns we find, to rule out cases where an IT clearly
19083       cannot cover this instruction.  This would improve behaviour for
19084       branch targets immediately following an IT-guarded group that is
19085       not of full length.  Eg, (and completely ignoring issues of 16-
19086       vs 32-bit insn length):
19087
19088              ite cond
19089              insn1
19090              insn2
19091       label: insn3
19092              insn4
19093
19094       The 'it' only conditionalises insn1 and insn2.  However, the
19095       current analysis is conservative and considers insn3 and insn4
19096       also possibly guarded.  Hence if 'label:' is the start of a hot
19097       loop we will get a big performance hit.
19098    */
19099    {
19100       /* Summary result of this analysis: False == safe but
19101          suboptimal. */
19102       vassert(guaranteedUnconditional == False);
19103
19104       UInt pc = guest_R15_curr_instr_notENC;
19105       vassert(0 == (pc & 1));
19106
19107       UInt pageoff = pc & 0xFFF;
19108       if (pageoff >= 18) {
19109          /* It's safe to poke about in the 9 halfwords preceding this
19110             insn.  So, have a look at them. */
19111          guaranteedUnconditional = True; /* assume no 'it' insn found,
19112                                             till we do */
19113          UShort* hwp = (UShort*)(HWord)pc;
19114          Int i;
19115          for (i = -1; i >= -9; i--) {
19116             /* We're in the same page.  (True, but commented out due
19117                to expense.) */
19118             /*
19119             vassert( ( ((UInt)(&hwp[i])) & 0xFFFFF000 )
19120                       == ( pc & 0xFFFFF000 ) );
19121             */
19122             /* All valid IT instructions must have the form 0xBFxy,
19123                where x can be anything, but y must be nonzero.  Find
19124                the number of insns covered by it (1 .. 4) and check to
19125                see if it can possibly reach up to the instruction in
19126                question.  Some (x,y) combinations mean UNPREDICTABLE,
19127                and the table is constructed to be conservative by
19128                returning 4 for those cases, so the analysis is safe
19129                even if the code uses unpredictable IT instructions (in
19130                which case its authors are nuts, but hey.)  */
19131             UShort hwp_i = hwp[i];
19132             if (UNLIKELY((hwp_i & 0xFF00) == 0xBF00 && (hwp_i & 0xF) != 0)) {
19133                /* might be an 'it' insn. */
19134                /* # guarded insns */
19135                Int n_guarded = (Int)it_length_table[hwp_i & 0xFF];
19136                vassert(n_guarded >= 1 && n_guarded <= 4);
19137                if (n_guarded * 2 /* # guarded HWs, worst case */
19138                    > (-(i+1)))   /* -(i+1): # remaining HWs after the IT */
19139                    /* -(i+0) also seems to work, even though I think
19140                       it's wrong.  I don't understand that. */
19141                   guaranteedUnconditional = False;
19142                break;
19143             }
19144          }
19145       }
19146    }
19147    /* --- END ITxxx optimisation analysis --- */
19148
19149    /* Generate the guarding condition for this insn, by examining
19150       ITSTATE.  Assign it to condT.  Also, generate new
19151       values for ITSTATE ready for stuffing back into the
19152       guest state, but don't actually do the Put yet, since it will
19153       need to stuffed back in only after the instruction gets to a
19154       point where it is sure to complete.  Mostly we let the code at
19155       decode_success handle this, but in cases where the insn contains
19156       a side exit, we have to update them before the exit. */
19157
19158    /* If the ITxxx optimisation analysis above could not prove that
19159       this instruction is guaranteed unconditional, we insert a
19160       lengthy IR preamble to compute the guarding condition at
19161       runtime.  If it can prove it (which obviously we hope is the
19162       normal case) then we insert a minimal preamble, which is
19163       equivalent to setting guest_ITSTATE to zero and then folding
19164       that through the full preamble (which completely disappears). */
19165
19166    IRTemp condT              = IRTemp_INVALID;
19167    IRTemp cond_AND_notInIT_T = IRTemp_INVALID;
19168
19169    IRTemp new_itstate        = IRTemp_INVALID;
19170    vassert(old_itstate == IRTemp_INVALID);
19171
19172    if (guaranteedUnconditional) {
19173       /* BEGIN "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
19174
19175       // ITSTATE = 0 :: I32
19176       IRTemp z32 = newTemp(Ity_I32);
19177       assign(z32, mkU32(0));
19178       put_ITSTATE(z32);
19179
19180       // old_itstate = 0 :: I32
19181       //
19182       // old_itstate = get_ITSTATE();
19183       old_itstate = z32; /* 0 :: I32 */
19184
19185       // new_itstate = old_itstate >> 8
19186       //             = 0 >> 8
19187       //             = 0 :: I32
19188       //
19189       // new_itstate = newTemp(Ity_I32);
19190       // assign(new_itstate,
19191       //        binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
19192       new_itstate = z32;
19193
19194       // ITSTATE = 0 :: I32(again)
19195       //
19196       // put_ITSTATE(new_itstate);
19197
19198       // condT1 = calc_cond_dyn( xor(and(old_istate,0xF0), 0xE0) )
19199       //        = calc_cond_dyn( xor(0,0xE0) )
19200       //        = calc_cond_dyn ( 0xE0 )
19201       //        = 1 :: I32
19202       // Not that this matters, since the computed value is not used:
19203       // see condT folding below
19204       //
19205       // IRTemp condT1 = newTemp(Ity_I32);
19206       // assign(condT1,
19207       //        mk_armg_calculate_condition_dyn(
19208       //           binop(Iop_Xor32,
19209       //                 binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
19210       //                 mkU32(0xE0))
19211       //       )
19212       // );
19213
19214       // condT = 32to8(and32(old_itstate,0xF0)) == 0  ? 1  : condT1
19215       //       = 32to8(and32(0,0xF0)) == 0  ? 1  : condT1
19216       //       = 32to8(0) == 0  ? 1  : condT1
19217       //       = 0 == 0  ? 1  : condT1
19218       //       = 1
19219       //
19220       // condT = newTemp(Ity_I32);
19221       // assign(condT, IRExpr_ITE(
19222       //                  unop(Iop_32to8, binop(Iop_And32,
19223       //                                        mkexpr(old_itstate),
19224       //                                        mkU32(0xF0))),
19225       //                  mkexpr(condT1),
19226       //                  mkU32(1))
19227       //       ));
19228       condT = newTemp(Ity_I32);
19229       assign(condT, mkU32(1));
19230
19231       // notInITt = xor32(and32(old_itstate, 1), 1)
19232       //          = xor32(and32(0, 1), 1)
19233       //          = xor32(0, 1)
19234       //          = 1 :: I32
19235       //
19236       // IRTemp notInITt = newTemp(Ity_I32);
19237       // assign(notInITt,
19238       //        binop(Iop_Xor32,
19239       //              binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
19240       //              mkU32(1)));
19241
19242       // cond_AND_notInIT_T = and32(notInITt, condT)
19243       //                    = and32(1, 1)
19244       //                    = 1
19245       //
19246       // cond_AND_notInIT_T = newTemp(Ity_I32);
19247       // assign(cond_AND_notInIT_T,
19248       //        binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
19249       cond_AND_notInIT_T = condT; /* 1 :: I32 */
19250
19251       /* END "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
19252    } else {
19253       /* BEGIN { STANDARD PREAMBLE; } */
19254
19255       old_itstate = get_ITSTATE();
19256
19257       new_itstate = newTemp(Ity_I32);
19258       assign(new_itstate,
19259              binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
19260
19261       put_ITSTATE(new_itstate);
19262
19263       /* Same strategy as for ARM insns: generate a condition
19264          temporary at this point (or IRTemp_INVALID, meaning
19265          unconditional).  We leave it to lower-level instruction
19266          decoders to decide whether they can generate straight-line
19267          code, or whether they must generate a side exit before the
19268          instruction.  condT :: Ity_I32 and is always either zero or
19269          one. */
19270       IRTemp condT1 = newTemp(Ity_I32);
19271       assign(condT1,
19272              mk_armg_calculate_condition_dyn(
19273                 binop(Iop_Xor32,
19274                       binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
19275                       mkU32(0xE0))
19276             )
19277       );
19278
19279       /* This is a bit complex, but needed to make Memcheck understand
19280          that, if the condition in old_itstate[7:4] denotes AL (that
19281          is, if this instruction is to be executed unconditionally),
19282          then condT does not depend on the results of calling the
19283          helper.
19284
19285          We test explicitly for old_itstate[7:4] == AL ^ 0xE, and in
19286          that case set condT directly to 1.  Else we use the results
19287          of the helper.  Since old_itstate is always defined and
19288          because Memcheck does lazy V-bit propagation through ITE,
19289          this will cause condT to always be a defined 1 if the
19290          condition is 'AL'.  From an execution semantics point of view
19291          this is irrelevant since we're merely duplicating part of the
19292          behaviour of the helper.  But it makes it clear to Memcheck,
19293          in this case, that condT does not in fact depend on the
19294          contents of the condition code thunk.  Without it, we get
19295          quite a lot of false errors.
19296
19297          So, just to clarify: from a straight semantics point of view,
19298          we can simply do "assign(condT, mkexpr(condT1))", and the
19299          simulator still runs fine.  It's just that we get loads of
19300          false errors from Memcheck. */
19301       condT = newTemp(Ity_I32);
19302       assign(condT, IRExpr_ITE(
19303                        binop(Iop_CmpNE32, binop(Iop_And32,
19304                                                 mkexpr(old_itstate),
19305                                                 mkU32(0xF0)),
19306                                           mkU32(0)),
19307                        mkexpr(condT1),
19308                        mkU32(1)
19309             ));
19310
19311       /* Something we don't have in ARM: generate a 0 or 1 value
19312          indicating whether or not we are in an IT block (NB: 0 = in
19313          IT block, 1 = not in IT block).  This is used to gate
19314          condition code updates in 16-bit Thumb instructions. */
19315       IRTemp notInITt = newTemp(Ity_I32);
19316       assign(notInITt,
19317              binop(Iop_Xor32,
19318                    binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
19319                    mkU32(1)));
19320
19321       /* Compute 'condT && notInITt' -- that is, the instruction is
19322          going to execute, and we're not in an IT block.  This is the
19323          gating condition for updating condition codes in 16-bit Thumb
19324          instructions, except for CMP, CMN and TST. */
19325       cond_AND_notInIT_T = newTemp(Ity_I32);
19326       assign(cond_AND_notInIT_T,
19327              binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
19328       /* END { STANDARD PREAMBLE; } */
19329    }
19330
19331
19332    /* At this point:
19333       * ITSTATE has been updated
19334       * condT holds the guarding condition for this instruction (0 or 1),
19335       * notInITt is 1 if we're in "normal" code, 0 if in an IT block
19336       * cond_AND_notInIT_T is the AND of the above two.
19337
19338       If the instruction proper can't trap, then there's nothing else
19339       to do w.r.t. ITSTATE -- just go and and generate IR for the
19340       insn, taking into account the guarding condition.
19341
19342       If, however, the instruction might trap, then we must back up
19343       ITSTATE to the old value, and re-update it after the potentially
19344       trapping IR section.  A trap can happen either via a memory
19345       reference or because we need to throw SIGILL.
19346
19347       If an instruction has a side exit, we need to be sure that any
19348       ITSTATE backup is re-updated before the side exit.
19349    */
19350
19351    /* ----------------------------------------------------------- */
19352    /* --                                                       -- */
19353    /* -- Thumb 16-bit integer instructions                     -- */
19354    /* --                                                       -- */
19355    /* -- IMPORTANT: references to insn1 or INSN1 are           -- */
19356    /* --            not allowed in this section                -- */
19357    /* --                                                       -- */
19358    /* ----------------------------------------------------------- */
19359
19360    /* 16-bit instructions inside an IT block, apart from CMP, CMN and
19361       TST, do not set the condition codes.  Hence we must dynamically
19362       test for this case for every condition code update. */
19363
19364    IROp   anOp   = Iop_INVALID;
19365    const HChar* anOpNm = NULL;
19366
19367    /* ================ 16-bit 15:6 cases ================ */
19368
19369    switch (INSN0(15,6)) {
19370
19371    case 0x10a:   // CMP
19372    case 0x10b: { // CMN
19373       /* ---------------- CMP Rn, Rm ---------------- */
19374       Bool   isCMN = INSN0(15,6) == 0x10b;
19375       UInt   rN    = INSN0(2,0);
19376       UInt   rM    = INSN0(5,3);
19377       IRTemp argL  = newTemp(Ity_I32);
19378       IRTemp argR  = newTemp(Ity_I32);
19379       assign( argL, getIRegT(rN) );
19380       assign( argR, getIRegT(rM) );
19381       /* Update flags regardless of whether in an IT block or not. */
19382       setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
19383                       argL, argR, condT );
19384       DIP("%s r%u, r%u\n", isCMN ? "cmn" : "cmp", rN, rM);
19385       goto decode_success;
19386    }
19387
19388    case 0x108: {
19389       /* ---------------- TST Rn, Rm ---------------- */
19390       UInt   rN   = INSN0(2,0);
19391       UInt   rM   = INSN0(5,3);
19392       IRTemp oldC = newTemp(Ity_I32);
19393       IRTemp oldV = newTemp(Ity_I32);
19394       IRTemp res  = newTemp(Ity_I32);
19395       assign( oldC, mk_armg_calculate_flag_c() );
19396       assign( oldV, mk_armg_calculate_flag_v() );
19397       assign( res,  binop(Iop_And32, getIRegT(rN), getIRegT(rM)) );
19398       /* Update flags regardless of whether in an IT block or not. */
19399       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
19400       DIP("tst r%u, r%u\n", rN, rM);
19401       goto decode_success;
19402    }
19403
19404    case 0x109: {
19405       /* ---------------- NEGS Rd, Rm ---------------- */
19406       /* Rd = -Rm */
19407       UInt   rM   = INSN0(5,3);
19408       UInt   rD   = INSN0(2,0);
19409       IRTemp arg  = newTemp(Ity_I32);
19410       IRTemp zero = newTemp(Ity_I32);
19411       assign(arg, getIRegT(rM));
19412       assign(zero, mkU32(0));
19413       // rD can never be r15
19414       putIRegT(rD, binop(Iop_Sub32, mkexpr(zero), mkexpr(arg)), condT);
19415       setFlags_D1_D2( ARMG_CC_OP_SUB, zero, arg, cond_AND_notInIT_T);
19416       DIP("negs r%u, r%u\n", rD, rM);
19417       goto decode_success;
19418    }
19419
19420    case 0x10F: {
19421       /* ---------------- MVNS Rd, Rm ---------------- */
19422       /* Rd = ~Rm */
19423       UInt   rM   = INSN0(5,3);
19424       UInt   rD   = INSN0(2,0);
19425       IRTemp oldV = newTemp(Ity_I32);
19426       IRTemp oldC = newTemp(Ity_I32);
19427       IRTemp res  = newTemp(Ity_I32);
19428       assign( oldV, mk_armg_calculate_flag_v() );
19429       assign( oldC, mk_armg_calculate_flag_c() );
19430       assign(res, unop(Iop_Not32, getIRegT(rM)));
19431       // rD can never be r15
19432       putIRegT(rD, mkexpr(res), condT);
19433       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19434                          cond_AND_notInIT_T );
19435       DIP("mvns r%u, r%u\n", rD, rM);
19436       goto decode_success;
19437    }
19438
19439    case 0x10C:
19440       /* ---------------- ORRS Rd, Rm ---------------- */
19441       anOp = Iop_Or32; anOpNm = "orr"; goto and_orr_eor_mul;
19442    case 0x100:
19443       /* ---------------- ANDS Rd, Rm ---------------- */
19444       anOp = Iop_And32; anOpNm = "and"; goto and_orr_eor_mul;
19445    case 0x101:
19446       /* ---------------- EORS Rd, Rm ---------------- */
19447       anOp = Iop_Xor32; anOpNm = "eor"; goto and_orr_eor_mul;
19448    case 0x10d:
19449       /* ---------------- MULS Rd, Rm ---------------- */
19450       anOp = Iop_Mul32; anOpNm = "mul"; goto and_orr_eor_mul;
19451    and_orr_eor_mul: {
19452       /* Rd = Rd `op` Rm */
19453       UInt   rM   = INSN0(5,3);
19454       UInt   rD   = INSN0(2,0);
19455       IRTemp res  = newTemp(Ity_I32);
19456       IRTemp oldV = newTemp(Ity_I32);
19457       IRTemp oldC = newTemp(Ity_I32);
19458       assign( oldV, mk_armg_calculate_flag_v() );
19459       assign( oldC, mk_armg_calculate_flag_c() );
19460       assign( res, binop(anOp, getIRegT(rD), getIRegT(rM) ));
19461       // not safe to read guest state after here
19462       // rD can never be r15
19463       putIRegT(rD, mkexpr(res), condT);
19464       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19465                          cond_AND_notInIT_T );
19466       DIP("%s r%u, r%u\n", anOpNm, rD, rM);
19467       goto decode_success;
19468    }
19469
19470    case 0x10E: {
19471       /* ---------------- BICS Rd, Rm ---------------- */
19472       /* Rd = Rd & ~Rm */
19473       UInt   rM   = INSN0(5,3);
19474       UInt   rD   = INSN0(2,0);
19475       IRTemp res  = newTemp(Ity_I32);
19476       IRTemp oldV = newTemp(Ity_I32);
19477       IRTemp oldC = newTemp(Ity_I32);
19478       assign( oldV, mk_armg_calculate_flag_v() );
19479       assign( oldC, mk_armg_calculate_flag_c() );
19480       assign( res, binop(Iop_And32, getIRegT(rD),
19481                                     unop(Iop_Not32, getIRegT(rM) )));
19482       // not safe to read guest state after here
19483       // rD can never be r15
19484       putIRegT(rD, mkexpr(res), condT);
19485       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19486                          cond_AND_notInIT_T );
19487       DIP("bics r%u, r%u\n", rD, rM);
19488       goto decode_success;
19489    }
19490
19491    case 0x105: {
19492       /* ---------------- ADCS Rd, Rm ---------------- */
19493       /* Rd = Rd + Rm + oldC */
19494       UInt   rM   = INSN0(5,3);
19495       UInt   rD   = INSN0(2,0);
19496       IRTemp argL = newTemp(Ity_I32);
19497       IRTemp argR = newTemp(Ity_I32);
19498       IRTemp oldC = newTemp(Ity_I32);
19499       IRTemp res  = newTemp(Ity_I32);
19500       assign(argL, getIRegT(rD));
19501       assign(argR, getIRegT(rM));
19502       assign(oldC, mk_armg_calculate_flag_c());
19503       assign(res, binop(Iop_Add32,
19504                         binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
19505                         mkexpr(oldC)));
19506       // rD can never be r15
19507       putIRegT(rD, mkexpr(res), condT);
19508       setFlags_D1_D2_ND( ARMG_CC_OP_ADC, argL, argR, oldC,
19509                          cond_AND_notInIT_T );
19510       DIP("adcs r%u, r%u\n", rD, rM);
19511       goto decode_success;
19512    }
19513
19514    case 0x106: {
19515       /* ---------------- SBCS Rd, Rm ---------------- */
19516       /* Rd = Rd - Rm - (oldC ^ 1) */
19517       UInt   rM   = INSN0(5,3);
19518       UInt   rD   = INSN0(2,0);
19519       IRTemp argL = newTemp(Ity_I32);
19520       IRTemp argR = newTemp(Ity_I32);
19521       IRTemp oldC = newTemp(Ity_I32);
19522       IRTemp res  = newTemp(Ity_I32);
19523       assign(argL, getIRegT(rD));
19524       assign(argR, getIRegT(rM));
19525       assign(oldC, mk_armg_calculate_flag_c());
19526       assign(res, binop(Iop_Sub32,
19527                         binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
19528                         binop(Iop_Xor32, mkexpr(oldC), mkU32(1))));
19529       // rD can never be r15
19530       putIRegT(rD, mkexpr(res), condT);
19531       setFlags_D1_D2_ND( ARMG_CC_OP_SBB, argL, argR, oldC,
19532                          cond_AND_notInIT_T );
19533       DIP("sbcs r%u, r%u\n", rD, rM);
19534       goto decode_success;
19535    }
19536
19537    case 0x2CB: {
19538       /* ---------------- UXTB Rd, Rm ---------------- */
19539       /* Rd = 8Uto32(Rm) */
19540       UInt rM = INSN0(5,3);
19541       UInt rD = INSN0(2,0);
19542       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFF)),
19543                    condT);
19544       DIP("uxtb r%u, r%u\n", rD, rM);
19545       goto decode_success;
19546    }
19547
19548    case 0x2C9: {
19549       /* ---------------- SXTB Rd, Rm ---------------- */
19550       /* Rd = 8Sto32(Rm) */
19551       UInt rM = INSN0(5,3);
19552       UInt rD = INSN0(2,0);
19553       putIRegT(rD, binop(Iop_Sar32,
19554                          binop(Iop_Shl32, getIRegT(rM), mkU8(24)),
19555                          mkU8(24)),
19556                    condT);
19557       DIP("sxtb r%u, r%u\n", rD, rM);
19558       goto decode_success;
19559    }
19560
19561    case 0x2CA: {
19562       /* ---------------- UXTH Rd, Rm ---------------- */
19563       /* Rd = 16Uto32(Rm) */
19564       UInt rM = INSN0(5,3);
19565       UInt rD = INSN0(2,0);
19566       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFFFF)),
19567                    condT);
19568       DIP("uxth r%u, r%u\n", rD, rM);
19569       goto decode_success;
19570    }
19571
19572    case 0x2C8: {
19573       /* ---------------- SXTH Rd, Rm ---------------- */
19574       /* Rd = 16Sto32(Rm) */
19575       UInt rM = INSN0(5,3);
19576       UInt rD = INSN0(2,0);
19577       putIRegT(rD, binop(Iop_Sar32,
19578                          binop(Iop_Shl32, getIRegT(rM), mkU8(16)),
19579                          mkU8(16)),
19580                    condT);
19581       DIP("sxth r%u, r%u\n", rD, rM);
19582       goto decode_success;
19583    }
19584
19585    case 0x102:   // LSLS
19586    case 0x103:   // LSRS
19587    case 0x104:   // ASRS
19588    case 0x107: { // RORS
19589       /* ---------------- LSLS Rs, Rd ---------------- */
19590       /* ---------------- LSRS Rs, Rd ---------------- */
19591       /* ---------------- ASRS Rs, Rd ---------------- */
19592       /* ---------------- RORS Rs, Rd ---------------- */
19593       /* Rd = Rd `op` Rs, and set flags */
19594       UInt   rS   = INSN0(5,3);
19595       UInt   rD   = INSN0(2,0);
19596       IRTemp oldV = newTemp(Ity_I32);
19597       IRTemp rDt  = newTemp(Ity_I32);
19598       IRTemp rSt  = newTemp(Ity_I32);
19599       IRTemp res  = newTemp(Ity_I32);
19600       IRTemp resC = newTemp(Ity_I32);
19601       const HChar* wot  = "???";
19602       assign(rSt, getIRegT(rS));
19603       assign(rDt, getIRegT(rD));
19604       assign(oldV, mk_armg_calculate_flag_v());
19605       /* Does not appear to be the standard 'how' encoding. */
19606       switch (INSN0(15,6)) {
19607          case 0x102:
19608             compute_result_and_C_after_LSL_by_reg(
19609                dis_buf, &res, &resC, rDt, rSt, rD, rS
19610             );
19611             wot = "lsl";
19612             break;
19613          case 0x103:
19614             compute_result_and_C_after_LSR_by_reg(
19615                dis_buf, &res, &resC, rDt, rSt, rD, rS
19616             );
19617             wot = "lsr";
19618             break;
19619          case 0x104:
19620             compute_result_and_C_after_ASR_by_reg(
19621                dis_buf, &res, &resC, rDt, rSt, rD, rS
19622             );
19623             wot = "asr";
19624             break;
19625          case 0x107:
19626             compute_result_and_C_after_ROR_by_reg(
19627                dis_buf, &res, &resC, rDt, rSt, rD, rS
19628             );
19629             wot = "ror";
19630             break;
19631          default:
19632             /*NOTREACHED*/vassert(0);
19633       }
19634       // not safe to read guest state after this point
19635       putIRegT(rD, mkexpr(res), condT);
19636       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
19637                          cond_AND_notInIT_T );
19638       DIP("%ss r%u, r%u\n", wot, rS, rD);
19639       goto decode_success;
19640    }
19641
19642    case 0x2E8:   // REV
19643    case 0x2E9: { // REV16
19644       /* ---------------- REV   Rd, Rm ---------------- */
19645       /* ---------------- REV16 Rd, Rm ---------------- */
19646       UInt rM = INSN0(5,3);
19647       UInt rD = INSN0(2,0);
19648       Bool isREV = INSN0(15,6) == 0x2E8;
19649       IRTemp arg = newTemp(Ity_I32);
19650       assign(arg, getIRegT(rM));
19651       IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
19652       putIRegT(rD, mkexpr(res), condT);
19653       DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM);
19654       goto decode_success;
19655    }
19656
19657    case 0x2EB: { // REVSH
19658       /* ---------------- REVSH Rd, Rn ---------------- */
19659       UInt rM = INSN0(5,3);
19660       UInt rD = INSN0(2,0);
19661       IRTemp irt_rM  = newTemp(Ity_I32);
19662       IRTemp irt_hi  = newTemp(Ity_I32);
19663       IRTemp irt_low = newTemp(Ity_I32);
19664       IRTemp irt_res = newTemp(Ity_I32);
19665       assign(irt_rM, getIRegT(rM));
19666       assign(irt_hi,
19667              binop(Iop_Sar32,
19668                    binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
19669                    mkU8(16)
19670              )
19671       );
19672       assign(irt_low,
19673              binop(Iop_And32,
19674                    binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
19675                    mkU32(0xFF)
19676              )
19677       );
19678       assign(irt_res,
19679              binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
19680       );
19681       putIRegT(rD, mkexpr(irt_res), condT);
19682       DIP("revsh r%u, r%u\n", rD, rM);
19683       goto decode_success;
19684    }
19685
19686    default:
19687       break; /* examine the next shortest prefix */
19688
19689    }
19690
19691
19692    /* ================ 16-bit 15:7 cases ================ */
19693
19694    switch (INSN0(15,7)) {
19695
19696    case BITS9(1,0,1,1,0,0,0,0,0): {
19697       /* ------------ ADD SP, #imm7 * 4 ------------ */
19698       UInt uimm7 = INSN0(6,0);
19699       putIRegT(13, binop(Iop_Add32, getIRegT(13), mkU32(uimm7 * 4)),
19700                    condT);
19701       DIP("add sp, #%u\n", uimm7 * 4);
19702       goto decode_success;
19703    }
19704
19705    case BITS9(1,0,1,1,0,0,0,0,1): {
19706       /* ------------ SUB SP, #imm7 * 4 ------------ */
19707       UInt uimm7 = INSN0(6,0);
19708       putIRegT(13, binop(Iop_Sub32, getIRegT(13), mkU32(uimm7 * 4)),
19709                    condT);
19710       DIP("sub sp, #%u\n", uimm7 * 4);
19711       goto decode_success;
19712    }
19713
19714    case BITS9(0,1,0,0,0,1,1,1,0): {
19715       /* ---------------- BX rM ---------------- */
19716       /* Branch to reg, and optionally switch modes.  Reg contains a
19717          suitably encoded address therefore (w CPSR.T at the bottom).
19718          Have to special-case r15, as usual. */
19719       UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
19720       if (BITS3(0,0,0) == INSN0(2,0)) {
19721          IRTemp dst = newTemp(Ity_I32);
19722          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19723          mk_skip_over_T16_if_cond_is_false(condT);
19724          condT = IRTemp_INVALID;
19725          // now uncond
19726          if (rM <= 14) {
19727             assign( dst, getIRegT(rM) );
19728          } else {
19729             vassert(rM == 15);
19730             assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
19731          }
19732          llPutIReg(15, mkexpr(dst));
19733          dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
19734          dres.whatNext    = Dis_StopHere;
19735          DIP("bx r%u (possibly switch to ARM mode)\n", rM);
19736          goto decode_success;
19737       }
19738       break;
19739    }
19740
19741    /* ---------------- BLX rM ---------------- */
19742    /* Branch and link to interworking address in rM. */
19743    case BITS9(0,1,0,0,0,1,1,1,1): {
19744       if (BITS3(0,0,0) == INSN0(2,0)) {
19745          UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
19746          IRTemp dst = newTemp(Ity_I32);
19747          if (rM <= 14) {
19748             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19749             mk_skip_over_T16_if_cond_is_false(condT);
19750             condT = IRTemp_INVALID;
19751             // now uncond
19752             /* We're returning to Thumb code, hence "| 1" */
19753             assign( dst, getIRegT(rM) );
19754             putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
19755                           IRTemp_INVALID );
19756             llPutIReg(15, mkexpr(dst));
19757             dres.jk_StopHere = Ijk_Call;
19758             dres.whatNext    = Dis_StopHere;
19759             DIP("blx r%u (possibly switch to ARM mode)\n", rM);
19760             goto decode_success;
19761          }
19762          /* else unpredictable, fall through */
19763       }
19764       break;
19765    }
19766
19767    default:
19768       break; /* examine the next shortest prefix */
19769
19770    }
19771
19772
19773    /* ================ 16-bit 15:8 cases ================ */
19774
19775    switch (INSN0(15,8)) {
19776
19777    case BITS8(1,1,0,1,1,1,1,1): {
19778       /* ---------------- SVC ---------------- */
19779       UInt imm8 = INSN0(7,0);
19780       if (imm8 == 0) {
19781          /* A syscall.  We can't do this conditionally, hence: */
19782          mk_skip_over_T16_if_cond_is_false( condT );
19783          // FIXME: what if we have to back up and restart this insn?
19784          // then ITSTATE will be wrong (we'll have it as "used")
19785          // when it isn't.  Correct is to save ITSTATE in a
19786          // stash pseudo-reg, and back up from that if we have to
19787          // restart.
19788          // uncond after here
19789          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ));
19790          dres.jk_StopHere = Ijk_Sys_syscall;
19791          dres.whatNext    = Dis_StopHere;
19792          DIP("svc #0x%08x\n", imm8);
19793          goto decode_success;
19794       }
19795       /* else fall through */
19796       break;
19797    }
19798
19799    case BITS8(0,1,0,0,0,1,0,0): {
19800       /* ---------------- ADD(HI) Rd, Rm ---------------- */
19801       UInt h1 = INSN0(7,7);
19802       UInt h2 = INSN0(6,6);
19803       UInt rM = (h2 << 3) | INSN0(5,3);
19804       UInt rD = (h1 << 3) | INSN0(2,0);
19805       //if (h1 == 0 && h2 == 0) { // Original T1 was more restrictive
19806       if (rD == 15 && rM == 15) {
19807          // then it's invalid
19808       } else {
19809          IRTemp res = newTemp(Ity_I32);
19810          assign( res, binop(Iop_Add32, getIRegT(rD), getIRegT(rM) ));
19811          if (rD != 15) {
19812             putIRegT( rD, mkexpr(res), condT );
19813          } else {
19814             /* Only allowed outside or last-in IT block; SIGILL if not so. */
19815             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19816             /* jump over insn if not selected */
19817             mk_skip_over_T16_if_cond_is_false(condT);
19818             condT = IRTemp_INVALID;
19819             // now uncond
19820             /* non-interworking branch */
19821             llPutIReg(15, binop(Iop_Or32, mkexpr(res), mkU32(1)));
19822             dres.jk_StopHere = Ijk_Boring;
19823             dres.whatNext    = Dis_StopHere;
19824          }
19825          DIP("add(hi) r%u, r%u\n", rD, rM);
19826          goto decode_success;
19827       }
19828       break;
19829    }
19830
19831    case BITS8(0,1,0,0,0,1,0,1): {
19832       /* ---------------- CMP(HI) Rd, Rm ---------------- */
19833       UInt h1 = INSN0(7,7);
19834       UInt h2 = INSN0(6,6);
19835       UInt rM = (h2 << 3) | INSN0(5,3);
19836       UInt rN = (h1 << 3) | INSN0(2,0);
19837       if (h1 != 0 || h2 != 0) {
19838          IRTemp argL  = newTemp(Ity_I32);
19839          IRTemp argR  = newTemp(Ity_I32);
19840          assign( argL, getIRegT(rN) );
19841          assign( argR, getIRegT(rM) );
19842          /* Update flags regardless of whether in an IT block or not. */
19843          setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
19844          DIP("cmphi r%u, r%u\n", rN, rM);
19845          goto decode_success;
19846       }
19847       break;
19848    }
19849
19850    case BITS8(0,1,0,0,0,1,1,0): {
19851       /* ---------------- MOV(HI) Rd, Rm ---------------- */
19852       UInt h1 = INSN0(7,7);
19853       UInt h2 = INSN0(6,6);
19854       UInt rM = (h2 << 3) | INSN0(5,3);
19855       UInt rD = (h1 << 3) | INSN0(2,0);
19856       /* The old ARM ARM seems to disallow the case where both Rd and
19857          Rm are "low" registers, but newer versions allow it. */
19858       if (1 /*h1 != 0 || h2 != 0*/) {
19859          IRTemp val = newTemp(Ity_I32);
19860          assign( val, getIRegT(rM) );
19861          if (rD != 15) {
19862             putIRegT( rD, mkexpr(val), condT );
19863          } else {
19864             /* Only allowed outside or last-in IT block; SIGILL if not so. */
19865             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19866             /* jump over insn if not selected */
19867             mk_skip_over_T16_if_cond_is_false(condT);
19868             condT = IRTemp_INVALID;
19869             // now uncond
19870             /* non-interworking branch */
19871             llPutIReg(15, binop(Iop_Or32, mkexpr(val), mkU32(1)));
19872             dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
19873             dres.whatNext    = Dis_StopHere;
19874          }
19875          DIP("mov r%u, r%u\n", rD, rM);
19876          goto decode_success;
19877       }
19878       break;
19879    }
19880
19881    case BITS8(1,0,1,1,1,1,1,1): {
19882       /* ---------------- IT (if-then) ---------------- */
19883       UInt firstcond = INSN0(7,4);
19884       UInt mask = INSN0(3,0);
19885       UInt newITSTATE = 0;
19886       /* This is the ITSTATE represented as described in
19887          libvex_guest_arm.h.  It is not the ARM ARM representation. */
19888       HChar c1 = '.';
19889       HChar c2 = '.';
19890       HChar c3 = '.';
19891       Bool valid = compute_ITSTATE( &newITSTATE, &c1, &c2, &c3,
19892                                     firstcond, mask );
19893       if (valid && firstcond != 0xF/*NV*/) {
19894          /* Not allowed in an IT block; SIGILL if so. */
19895          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
19896
19897          IRTemp t = newTemp(Ity_I32);
19898          assign(t, mkU32(newITSTATE));
19899          put_ITSTATE(t);
19900
19901          DIP("it%c%c%c %s\n", c1, c2, c3, nCC(firstcond));
19902          goto decode_success;
19903       }
19904       break;
19905    }
19906
19907    case BITS8(1,0,1,1,0,0,0,1):
19908    case BITS8(1,0,1,1,0,0,1,1):
19909    case BITS8(1,0,1,1,1,0,0,1):
19910    case BITS8(1,0,1,1,1,0,1,1): {
19911       /* ---------------- CB{N}Z ---------------- */
19912       UInt rN    = INSN0(2,0);
19913       UInt bOP   = INSN0(11,11);
19914       UInt imm32 = (INSN0(9,9) << 6) | (INSN0(7,3) << 1);
19915       gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
19916       /* It's a conditional branch forward. */
19917       IRTemp kond = newTemp(Ity_I1);
19918       assign( kond, binop(bOP ? Iop_CmpNE32 : Iop_CmpEQ32,
19919                           getIRegT(rN), mkU32(0)) );
19920
19921       vassert(0 == (guest_R15_curr_instr_notENC & 1));
19922       /* Looks like the nearest insn we can branch to is the one after
19923          next.  That makes sense, as there's no point in being able to
19924          encode a conditional branch to the next instruction. */
19925       UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
19926       stmt(IRStmt_Exit( mkexpr(kond),
19927                         Ijk_Boring,
19928                         IRConst_U32(toUInt(dst)),
19929                         OFFB_R15T ));
19930       DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
19931       goto decode_success;
19932    }
19933
19934    default:
19935       break; /* examine the next shortest prefix */
19936
19937    }
19938
19939
19940    /* ================ 16-bit 15:9 cases ================ */
19941
19942    switch (INSN0(15,9)) {
19943
19944    case BITS7(1,0,1,1,0,1,0): {
19945       /* ---------------- PUSH ---------------- */
19946       /* This is a bit like STMxx, but way simpler. Complications we
19947          don't have to deal with:
19948          * SP being one of the transferred registers
19949          * direction (increment vs decrement)
19950          * before-vs-after-ness
19951       */
19952       Int  i, nRegs;
19953       UInt bitR    = INSN0(8,8);
19954       UInt regList = INSN0(7,0);
19955       if (bitR) regList |= (1 << 14);
19956
19957       /* At least one register must be transferred, else result is
19958          UNPREDICTABLE. */
19959       if (regList != 0) {
19960          /* Since we can't generate a guaranteed non-trapping IR
19961             sequence, (1) jump over the insn if it is gated false, and
19962             (2) back out the ITSTATE update. */
19963          mk_skip_over_T16_if_cond_is_false(condT);
19964          condT = IRTemp_INVALID;
19965          put_ITSTATE(old_itstate);
19966          // now uncond
19967
19968          nRegs = 0;
19969          for (i = 0; i < 16; i++) {
19970             if ((regList & (1 << i)) != 0)
19971                nRegs++;
19972          }
19973          vassert(nRegs >= 1 && nRegs <= 9);
19974
19975          /* Move SP down first of all, so we're "covered".  And don't
19976             mess with its alignment. */
19977          IRTemp newSP = newTemp(Ity_I32);
19978          assign(newSP, binop(Iop_Sub32, getIRegT(13), mkU32(4 * nRegs)));
19979          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
19980
19981          /* Generate a transfer base address as a forced-aligned
19982             version of the final SP value. */
19983          IRTemp base = newTemp(Ity_I32);
19984          assign(base, binop(Iop_And32, mkexpr(newSP), mkU32(~3)));
19985
19986          /* Now the transfers */
19987          nRegs = 0;
19988          for (i = 0; i < 16; i++) {
19989             if ((regList & (1 << i)) != 0) {
19990                storeLE( binop(Iop_Add32, mkexpr(base), mkU32(4 * nRegs)),
19991                         getIRegT(i) );
19992                nRegs++;
19993             }
19994          }
19995
19996          /* Reinstate the ITSTATE update. */
19997          put_ITSTATE(new_itstate);
19998
19999          DIP("push {%s0x%04x}\n", bitR ? "lr," : "", regList & 0xFF);
20000          goto decode_success;
20001       }
20002       break;
20003    }
20004
20005    case BITS7(1,0,1,1,1,1,0): {
20006       /* ---------------- POP ---------------- */
20007       Int  i, nRegs;
20008       UInt bitR    = INSN0(8,8);
20009       UInt regList = INSN0(7,0);
20010
20011       /* At least one register must be transferred, else result is
20012          UNPREDICTABLE. */
20013       if (regList != 0 || bitR) {
20014          /* Since we can't generate a guaranteed non-trapping IR
20015             sequence, (1) jump over the insn if it is gated false, and
20016             (2) back out the ITSTATE update. */
20017          mk_skip_over_T16_if_cond_is_false(condT);
20018          condT = IRTemp_INVALID;
20019          put_ITSTATE(old_itstate);
20020          // now uncond
20021
20022          nRegs = 0;
20023          for (i = 0; i < 8; i++) {
20024             if ((regList & (1 << i)) != 0)
20025                nRegs++;
20026          }
20027          vassert(nRegs >= 0 && nRegs <= 8);
20028          vassert(bitR == 0 || bitR == 1);
20029
20030          IRTemp oldSP = newTemp(Ity_I32);
20031          assign(oldSP, getIRegT(13));
20032
20033          /* Generate a transfer base address as a forced-aligned
20034             version of the original SP value. */
20035          IRTemp base = newTemp(Ity_I32);
20036          assign(base, binop(Iop_And32, mkexpr(oldSP), mkU32(~3)));
20037
20038          /* Compute a new value for SP, but don't install it yet, so
20039             that we're "covered" until all the transfers are done.
20040             And don't mess with its alignment. */
20041          IRTemp newSP = newTemp(Ity_I32);
20042          assign(newSP, binop(Iop_Add32, mkexpr(oldSP),
20043                                         mkU32(4 * (nRegs + bitR))));
20044
20045          /* Now the transfers, not including PC */
20046          nRegs = 0;
20047          for (i = 0; i < 8; i++) {
20048             if ((regList & (1 << i)) != 0) {
20049                putIRegT(i, loadLE( Ity_I32,
20050                                    binop(Iop_Add32, mkexpr(base),
20051                                                     mkU32(4 * nRegs))),
20052                            IRTemp_INVALID );
20053                nRegs++;
20054             }
20055          }
20056
20057          IRTemp newPC = IRTemp_INVALID;
20058          if (bitR) {
20059             newPC = newTemp(Ity_I32);
20060             assign( newPC, loadLE( Ity_I32,
20061                                    binop(Iop_Add32, mkexpr(base),
20062                                                     mkU32(4 * nRegs))));
20063          }
20064
20065          /* Now we can safely install the new SP value */
20066          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
20067
20068          /* Reinstate the ITSTATE update. */
20069          put_ITSTATE(new_itstate);
20070
20071          /* now, do we also have to do a branch?  If so, it turns out
20072             that the new PC value is encoded exactly as we need it to
20073             be -- with CPSR.T in the bottom bit.  So we can simply use
20074             it as is, no need to mess with it.  Note, therefore, this
20075             is an interworking return. */
20076          if (bitR) {
20077             llPutIReg(15, mkexpr(newPC));
20078             dres.jk_StopHere = Ijk_Ret;
20079             dres.whatNext    = Dis_StopHere;
20080          }
20081
20082          DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
20083          goto decode_success;
20084       }
20085       break;
20086    }
20087
20088    case BITS7(0,0,0,1,1,1,0):   /* ADDS */
20089    case BITS7(0,0,0,1,1,1,1): { /* SUBS */
20090       /* ---------------- ADDS Rd, Rn, #uimm3 ---------------- */
20091       /* ---------------- SUBS Rd, Rn, #uimm3 ---------------- */
20092       UInt   uimm3 = INSN0(8,6);
20093       UInt   rN    = INSN0(5,3);
20094       UInt   rD    = INSN0(2,0);
20095       UInt   isSub = INSN0(9,9);
20096       IRTemp argL  = newTemp(Ity_I32);
20097       IRTemp argR  = newTemp(Ity_I32);
20098       assign( argL, getIRegT(rN) );
20099       assign( argR, mkU32(uimm3) );
20100       putIRegT(rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
20101                          mkexpr(argL), mkexpr(argR)),
20102                    condT);
20103       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
20104                       argL, argR, cond_AND_notInIT_T );
20105       DIP("%s r%u, r%u, #%u\n", isSub ? "subs" : "adds", rD, rN, uimm3);
20106       goto decode_success;
20107    }
20108
20109    case BITS7(0,0,0,1,1,0,0):   /* ADDS */
20110    case BITS7(0,0,0,1,1,0,1): { /* SUBS */
20111       /* ---------------- ADDS Rd, Rn, Rm ---------------- */
20112       /* ---------------- SUBS Rd, Rn, Rm ---------------- */
20113       UInt   rM    = INSN0(8,6);
20114       UInt   rN    = INSN0(5,3);
20115       UInt   rD    = INSN0(2,0);
20116       UInt   isSub = INSN0(9,9);
20117       IRTemp argL  = newTemp(Ity_I32);
20118       IRTemp argR  = newTemp(Ity_I32);
20119       assign( argL, getIRegT(rN) );
20120       assign( argR, getIRegT(rM) );
20121       putIRegT( rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
20122                           mkexpr(argL), mkexpr(argR)),
20123                     condT );
20124       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
20125                       argL, argR, cond_AND_notInIT_T );
20126       DIP("%s r%u, r%u, r%u\n", isSub ? "subs" : "adds", rD, rN, rM);
20127       goto decode_success;
20128    }
20129
20130    case BITS7(0,1,0,1,0,0,0):   /* STR */
20131    case BITS7(0,1,0,1,1,0,0): { /* LDR */
20132       /* ------------- LDR Rd, [Rn, Rm] ------------- */
20133       /* ------------- STR Rd, [Rn, Rm] ------------- */
20134       /* LDR/STR Rd, [Rn + Rm] */
20135       UInt    rD   = INSN0(2,0);
20136       UInt    rN   = INSN0(5,3);
20137       UInt    rM   = INSN0(8,6);
20138       UInt    isLD = INSN0(11,11);
20139
20140       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20141       put_ITSTATE(old_itstate); // backout
20142       if (isLD) {
20143          IRTemp tD = newTemp(Ity_I32);
20144          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
20145          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20146       } else {
20147          storeGuardedLE(ea, getIRegT(rD), condT);
20148       }
20149       put_ITSTATE(new_itstate); // restore
20150
20151       DIP("%s r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
20152       goto decode_success;
20153    }
20154
20155    case BITS7(0,1,0,1,0,0,1):
20156    case BITS7(0,1,0,1,1,0,1): {
20157       /* ------------- LDRH Rd, [Rn, Rm] ------------- */
20158       /* ------------- STRH Rd, [Rn, Rm] ------------- */
20159       /* LDRH/STRH Rd, [Rn + Rm] */
20160       UInt    rD   = INSN0(2,0);
20161       UInt    rN   = INSN0(5,3);
20162       UInt    rM   = INSN0(8,6);
20163       UInt    isLD = INSN0(11,11);
20164
20165       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20166       put_ITSTATE(old_itstate); // backout
20167       if (isLD) {
20168          IRTemp tD = newTemp(Ity_I32);
20169          loadGuardedLE(tD, ILGop_16Uto32, ea, llGetIReg(rD), condT);
20170          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20171       } else {
20172          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
20173       }
20174       put_ITSTATE(new_itstate); // restore
20175
20176       DIP("%sh r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
20177       goto decode_success;
20178    }
20179
20180    case BITS7(0,1,0,1,1,1,1): {
20181       /* ------------- LDRSH Rd, [Rn, Rm] ------------- */
20182       /* LDRSH Rd, [Rn + Rm] */
20183       UInt    rD = INSN0(2,0);
20184       UInt    rN = INSN0(5,3);
20185       UInt    rM = INSN0(8,6);
20186
20187       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20188       put_ITSTATE(old_itstate); // backout
20189       IRTemp tD = newTemp(Ity_I32);
20190       loadGuardedLE(tD, ILGop_16Sto32, ea, llGetIReg(rD), condT);
20191       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20192       put_ITSTATE(new_itstate); // restore
20193
20194       DIP("ldrsh r%u, [r%u, r%u]\n", rD, rN, rM);
20195       goto decode_success;
20196    }
20197
20198    case BITS7(0,1,0,1,0,1,1): {
20199       /* ------------- LDRSB Rd, [Rn, Rm] ------------- */
20200       /* LDRSB Rd, [Rn + Rm] */
20201       UInt    rD = INSN0(2,0);
20202       UInt    rN = INSN0(5,3);
20203       UInt    rM = INSN0(8,6);
20204
20205       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20206       put_ITSTATE(old_itstate); // backout
20207       IRTemp tD = newTemp(Ity_I32);
20208       loadGuardedLE(tD, ILGop_8Sto32, ea, llGetIReg(rD), condT);
20209       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20210       put_ITSTATE(new_itstate); // restore
20211
20212       DIP("ldrsb r%u, [r%u, r%u]\n", rD, rN, rM);
20213       goto decode_success;
20214    }
20215
20216    case BITS7(0,1,0,1,0,1,0):
20217    case BITS7(0,1,0,1,1,1,0): {
20218       /* ------------- LDRB Rd, [Rn, Rm] ------------- */
20219       /* ------------- STRB Rd, [Rn, Rm] ------------- */
20220       /* LDRB/STRB Rd, [Rn + Rm] */
20221       UInt    rD   = INSN0(2,0);
20222       UInt    rN   = INSN0(5,3);
20223       UInt    rM   = INSN0(8,6);
20224       UInt    isLD = INSN0(11,11);
20225
20226       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20227       put_ITSTATE(old_itstate); // backout
20228       if (isLD) {
20229          IRTemp tD = newTemp(Ity_I32);
20230          loadGuardedLE(tD, ILGop_8Uto32, ea, llGetIReg(rD), condT);
20231          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20232       } else {
20233          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
20234       }
20235       put_ITSTATE(new_itstate); // restore
20236
20237       DIP("%sb r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
20238       goto decode_success;
20239    }
20240
20241    default:
20242       break; /* examine the next shortest prefix */
20243
20244    }
20245
20246
20247    /* ================ 16-bit 15:11 cases ================ */
20248
20249    switch (INSN0(15,11)) {
20250
20251    case BITS5(0,0,1,1,0):
20252    case BITS5(0,0,1,1,1): {
20253       /* ---------------- ADDS Rn, #uimm8 ---------------- */
20254       /* ---------------- SUBS Rn, #uimm8 ---------------- */
20255       UInt   isSub = INSN0(11,11);
20256       UInt   rN    = INSN0(10,8);
20257       UInt   uimm8 = INSN0(7,0);
20258       IRTemp argL  = newTemp(Ity_I32);
20259       IRTemp argR  = newTemp(Ity_I32);
20260       assign( argL, getIRegT(rN) );
20261       assign( argR, mkU32(uimm8) );
20262       putIRegT( rN, binop(isSub ? Iop_Sub32 : Iop_Add32,
20263                           mkexpr(argL), mkexpr(argR)), condT );
20264       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
20265                       argL, argR, cond_AND_notInIT_T );
20266       DIP("%s r%u, #%u\n", isSub ? "subs" : "adds", rN, uimm8);
20267       goto decode_success;
20268    }
20269
20270    case BITS5(1,0,1,0,0): {
20271       /* ---------------- ADD rD, PC, #imm8 * 4 ---------------- */
20272       /* a.k.a. ADR */
20273       /* rD = align4(PC) + imm8 * 4 */
20274       UInt rD   = INSN0(10,8);
20275       UInt imm8 = INSN0(7,0);
20276       putIRegT(rD, binop(Iop_Add32,
20277                          binop(Iop_And32, getIRegT(15), mkU32(~3U)),
20278                          mkU32(imm8 * 4)),
20279                    condT);
20280       DIP("add r%u, pc, #%u\n", rD, imm8 * 4);
20281       goto decode_success;
20282    }
20283
20284    case BITS5(1,0,1,0,1): {
20285       /* ---------------- ADD rD, SP, #imm8 * 4 ---------------- */
20286       UInt rD   = INSN0(10,8);
20287       UInt imm8 = INSN0(7,0);
20288       putIRegT(rD, binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4)),
20289                    condT);
20290       DIP("add r%u, r13, #%u\n", rD, imm8 * 4);
20291       goto decode_success;
20292    }
20293
20294    case BITS5(0,0,1,0,1): {
20295       /* ---------------- CMP Rn, #uimm8 ---------------- */
20296       UInt   rN    = INSN0(10,8);
20297       UInt   uimm8 = INSN0(7,0);
20298       IRTemp argL  = newTemp(Ity_I32);
20299       IRTemp argR  = newTemp(Ity_I32);
20300       assign( argL, getIRegT(rN) );
20301       assign( argR, mkU32(uimm8) );
20302       /* Update flags regardless of whether in an IT block or not. */
20303       setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
20304       DIP("cmp r%u, #%u\n", rN, uimm8);
20305       goto decode_success;
20306    }
20307
20308    case BITS5(0,0,1,0,0): {
20309       /* -------------- (T1) MOVS Rn, #uimm8 -------------- */
20310       UInt   rD    = INSN0(10,8);
20311       UInt   uimm8 = INSN0(7,0);
20312       IRTemp oldV  = newTemp(Ity_I32);
20313       IRTemp oldC  = newTemp(Ity_I32);
20314       IRTemp res   = newTemp(Ity_I32);
20315       assign( oldV, mk_armg_calculate_flag_v() );
20316       assign( oldC, mk_armg_calculate_flag_c() );
20317       assign( res, mkU32(uimm8) );
20318       putIRegT(rD, mkexpr(res), condT);
20319       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
20320                          cond_AND_notInIT_T );
20321       DIP("movs r%u, #%u\n", rD, uimm8);
20322       goto decode_success;
20323    }
20324
20325    case BITS5(0,1,0,0,1): {
20326       /* ------------- LDR Rd, [PC, #imm8 * 4] ------------- */
20327       /* LDR Rd, [align4(PC) + imm8 * 4] */
20328       UInt   rD   = INSN0(10,8);
20329       UInt   imm8 = INSN0(7,0);
20330       IRTemp ea   = newTemp(Ity_I32);
20331
20332       assign(ea, binop(Iop_Add32,
20333                        binop(Iop_And32, getIRegT(15), mkU32(~3U)),
20334                        mkU32(imm8 * 4)));
20335       put_ITSTATE(old_itstate); // backout
20336       IRTemp tD = newTemp(Ity_I32);
20337       loadGuardedLE( tD, ILGop_Ident32, mkexpr(ea), llGetIReg(rD), condT );
20338       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20339       put_ITSTATE(new_itstate); // restore
20340
20341       DIP("ldr r%u, [pc, #%u]\n", rD, imm8 * 4);
20342       goto decode_success;
20343    }
20344
20345    case BITS5(0,1,1,0,0):   /* STR */
20346    case BITS5(0,1,1,0,1): { /* LDR */
20347       /* ------------- LDR Rd, [Rn, #imm5 * 4] ------------- */
20348       /* ------------- STR Rd, [Rn, #imm5 * 4] ------------- */
20349       /* LDR/STR Rd, [Rn + imm5 * 4] */
20350       UInt    rD   = INSN0(2,0);
20351       UInt    rN   = INSN0(5,3);
20352       UInt    imm5 = INSN0(10,6);
20353       UInt    isLD = INSN0(11,11);
20354
20355       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 4));
20356       put_ITSTATE(old_itstate); // backout
20357       if (isLD) {
20358          IRTemp tD = newTemp(Ity_I32);
20359          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
20360          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20361       } else {
20362          storeGuardedLE( ea, getIRegT(rD), condT );
20363       }
20364       put_ITSTATE(new_itstate); // restore
20365
20366       DIP("%s r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 4);
20367       goto decode_success;
20368    }
20369
20370    case BITS5(1,0,0,0,0):   /* STRH */
20371    case BITS5(1,0,0,0,1): { /* LDRH */
20372       /* ------------- LDRH Rd, [Rn, #imm5 * 2] ------------- */
20373       /* ------------- STRH Rd, [Rn, #imm5 * 2] ------------- */
20374       /* LDRH/STRH Rd, [Rn + imm5 * 2] */
20375       UInt    rD   = INSN0(2,0);
20376       UInt    rN   = INSN0(5,3);
20377       UInt    imm5 = INSN0(10,6);
20378       UInt    isLD = INSN0(11,11);
20379
20380       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 2));
20381       put_ITSTATE(old_itstate); // backout
20382       if (isLD) {
20383          IRTemp tD = newTemp(Ity_I32);
20384          loadGuardedLE( tD, ILGop_16Uto32, ea, llGetIReg(rD), condT );
20385          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20386       } else {
20387          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
20388       }
20389       put_ITSTATE(new_itstate); // restore
20390
20391       DIP("%sh r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 2);
20392       goto decode_success;
20393    }
20394
20395    case BITS5(0,1,1,1,0):   /* STRB */
20396    case BITS5(0,1,1,1,1): { /* LDRB */
20397       /* ------------- LDRB Rd, [Rn, #imm5] ------------- */
20398       /* ------------- STRB Rd, [Rn, #imm5] ------------- */
20399       /* LDRB/STRB Rd, [Rn + imm5] */
20400       UInt    rD   = INSN0(2,0);
20401       UInt    rN   = INSN0(5,3);
20402       UInt    imm5 = INSN0(10,6);
20403       UInt    isLD = INSN0(11,11);
20404
20405       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5));
20406       put_ITSTATE(old_itstate); // backout
20407       if (isLD) {
20408          IRTemp tD = newTemp(Ity_I32);
20409          loadGuardedLE( tD, ILGop_8Uto32, ea, llGetIReg(rD), condT );
20410          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20411       } else {
20412          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
20413       }
20414       put_ITSTATE(new_itstate); // restore
20415
20416       DIP("%sb r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5);
20417       goto decode_success;
20418    }
20419
20420    case BITS5(1,0,0,1,0):   /* STR */
20421    case BITS5(1,0,0,1,1): { /* LDR */
20422       /* ------------- LDR Rd, [SP, #imm8 * 4] ------------- */
20423       /* ------------- STR Rd, [SP, #imm8 * 4] ------------- */
20424       /* LDR/STR Rd, [SP + imm8 * 4] */
20425       UInt rD    = INSN0(10,8);
20426       UInt imm8  = INSN0(7,0);
20427       UInt isLD  = INSN0(11,11);
20428
20429       IRExpr* ea = binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4));
20430       put_ITSTATE(old_itstate); // backout
20431       if (isLD) {
20432          IRTemp tD = newTemp(Ity_I32);
20433          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
20434          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20435       } else {
20436          storeGuardedLE(ea, getIRegT(rD), condT);
20437       }
20438       put_ITSTATE(new_itstate); // restore
20439
20440       DIP("%s r%u, [sp, #%u]\n", isLD ? "ldr" : "str", rD, imm8 * 4);
20441       goto decode_success;
20442    }
20443
20444    case BITS5(1,1,0,0,1): {
20445       /* ------------- LDMIA Rn!, {reglist} ------------- */
20446       Int i, nRegs = 0;
20447       UInt rN   = INSN0(10,8);
20448       UInt list = INSN0(7,0);
20449       /* Empty lists aren't allowed. */
20450       if (list != 0) {
20451          mk_skip_over_T16_if_cond_is_false(condT);
20452          condT = IRTemp_INVALID;
20453          put_ITSTATE(old_itstate);
20454          // now uncond
20455
20456          IRTemp oldRn = newTemp(Ity_I32);
20457          IRTemp base  = newTemp(Ity_I32);
20458          assign(oldRn, getIRegT(rN));
20459          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
20460          for (i = 0; i < 8; i++) {
20461             if (0 == (list & (1 << i)))
20462                continue;
20463             nRegs++;
20464             putIRegT(
20465                i, loadLE(Ity_I32,
20466                          binop(Iop_Add32, mkexpr(base),
20467                                           mkU32(nRegs * 4 - 4))),
20468                IRTemp_INVALID
20469             );
20470          }
20471          /* Only do the writeback for rN if it isn't in the list of
20472             registers to be transferred. */
20473          if (0 == (list & (1 << rN))) {
20474             putIRegT(rN,
20475                      binop(Iop_Add32, mkexpr(oldRn),
20476                                       mkU32(nRegs * 4)),
20477                      IRTemp_INVALID
20478             );
20479          }
20480
20481          /* Reinstate the ITSTATE update. */
20482          put_ITSTATE(new_itstate);
20483
20484          DIP("ldmia r%u!, {0x%04x}\n", rN, list);
20485          goto decode_success;
20486       }
20487       break;
20488    }
20489
20490    case BITS5(1,1,0,0,0): {
20491       /* ------------- STMIA Rn!, {reglist} ------------- */
20492       Int i, nRegs = 0;
20493       UInt rN   = INSN0(10,8);
20494       UInt list = INSN0(7,0);
20495       /* Empty lists aren't allowed.  Also, if rN is in the list then
20496          it must be the lowest numbered register in the list. */
20497       Bool valid = list != 0;
20498       if (valid && 0 != (list & (1 << rN))) {
20499          for (i = 0; i < rN; i++) {
20500             if (0 != (list & (1 << i)))
20501                valid = False;
20502          }
20503       }
20504       if (valid) {
20505          mk_skip_over_T16_if_cond_is_false(condT);
20506          condT = IRTemp_INVALID;
20507          put_ITSTATE(old_itstate);
20508          // now uncond
20509
20510          IRTemp oldRn = newTemp(Ity_I32);
20511          IRTemp base = newTemp(Ity_I32);
20512          assign(oldRn, getIRegT(rN));
20513          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
20514          for (i = 0; i < 8; i++) {
20515             if (0 == (list & (1 << i)))
20516                continue;
20517             nRegs++;
20518             storeLE( binop(Iop_Add32, mkexpr(base), mkU32(nRegs * 4 - 4)),
20519                      getIRegT(i) );
20520          }
20521          /* Always do the writeback. */
20522          putIRegT(rN,
20523                   binop(Iop_Add32, mkexpr(oldRn),
20524                                    mkU32(nRegs * 4)),
20525                   IRTemp_INVALID);
20526
20527          /* Reinstate the ITSTATE update. */
20528          put_ITSTATE(new_itstate);
20529
20530          DIP("stmia r%u!, {0x%04x}\n", rN, list);
20531          goto decode_success;
20532       }
20533       break;
20534    }
20535
20536    case BITS5(0,0,0,0,0):   /* LSLS */
20537    case BITS5(0,0,0,0,1):   /* LSRS */
20538    case BITS5(0,0,0,1,0): { /* ASRS */
20539       /* ---------------- LSLS Rd, Rm, #imm5 ---------------- */
20540       /* ---------------- LSRS Rd, Rm, #imm5 ---------------- */
20541       /* ---------------- ASRS Rd, Rm, #imm5 ---------------- */
20542       UInt   rD   = INSN0(2,0);
20543       UInt   rM   = INSN0(5,3);
20544       UInt   imm5 = INSN0(10,6);
20545       IRTemp res  = newTemp(Ity_I32);
20546       IRTemp resC = newTemp(Ity_I32);
20547       IRTemp rMt  = newTemp(Ity_I32);
20548       IRTemp oldV = newTemp(Ity_I32);
20549       const HChar* wot  = "???";
20550       assign(rMt, getIRegT(rM));
20551       assign(oldV, mk_armg_calculate_flag_v());
20552       /* Looks like INSN0(12,11) are the standard 'how' encoding.
20553          Could compactify if the ROR case later appears. */
20554       switch (INSN0(15,11)) {
20555          case BITS5(0,0,0,0,0):
20556             compute_result_and_C_after_LSL_by_imm5(
20557                dis_buf, &res, &resC, rMt, imm5, rM
20558             );
20559             wot = "lsl";
20560             break;
20561          case BITS5(0,0,0,0,1):
20562             compute_result_and_C_after_LSR_by_imm5(
20563                dis_buf, &res, &resC, rMt, imm5, rM
20564             );
20565             wot = "lsr";
20566             break;
20567          case BITS5(0,0,0,1,0):
20568             compute_result_and_C_after_ASR_by_imm5(
20569                dis_buf, &res, &resC, rMt, imm5, rM
20570             );
20571             wot = "asr";
20572             break;
20573          default:
20574             /*NOTREACHED*/vassert(0);
20575       }
20576       // not safe to read guest state after this point
20577       putIRegT(rD, mkexpr(res), condT);
20578       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
20579                          cond_AND_notInIT_T );
20580       /* ignore buf and roll our own output */
20581       DIP("%ss r%u, r%u, #%u\n", wot, rD, rM, imm5);
20582       goto decode_success;
20583    }
20584
20585    case BITS5(1,1,1,0,0): {
20586       /* ---------------- B #simm11 ---------------- */
20587       UInt uimm11 = INSN0(10,0);  uimm11 <<= 21;
20588       Int  simm11 = (Int)uimm11;  simm11 >>= 20;
20589       UInt dst    = simm11 + guest_R15_curr_instr_notENC + 4;
20590       /* Only allowed outside or last-in IT block; SIGILL if not so. */
20591       gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20592       // and skip this insn if not selected; being cleverer is too
20593       // difficult
20594       mk_skip_over_T16_if_cond_is_false(condT);
20595       condT = IRTemp_INVALID;
20596       // now uncond
20597       llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
20598       dres.jk_StopHere = Ijk_Boring;
20599       dres.whatNext    = Dis_StopHere;
20600       DIP("b 0x%x\n", dst);
20601       goto decode_success;
20602    }
20603
20604    default:
20605       break; /* examine the next shortest prefix */
20606
20607    }
20608
20609
20610    /* ================ 16-bit 15:12 cases ================ */
20611
20612    switch (INSN0(15,12)) {
20613
20614    case BITS4(1,1,0,1): {
20615       /* ---------------- Bcond #simm8 ---------------- */
20616       UInt cond  = INSN0(11,8);
20617       UInt uimm8 = INSN0(7,0);  uimm8 <<= 24;
20618       Int  simm8 = (Int)uimm8;  simm8 >>= 23;
20619       UInt dst   = simm8 + guest_R15_curr_instr_notENC + 4;
20620       if (cond != ARMCondAL && cond != ARMCondNV) {
20621          /* Not allowed in an IT block; SIGILL if so. */
20622          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
20623
20624          IRTemp kondT = newTemp(Ity_I32);
20625          assign( kondT, mk_armg_calculate_condition(cond) );
20626          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
20627                             Ijk_Boring,
20628                             IRConst_U32(dst | 1/*CPSR.T*/),
20629                             OFFB_R15T ));
20630          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2)
20631                               | 1 /*CPSR.T*/ ));
20632          dres.jk_StopHere = Ijk_Boring;
20633          dres.whatNext    = Dis_StopHere;
20634          DIP("b%s 0x%x\n", nCC(cond), dst);
20635          goto decode_success;
20636       }
20637       break;
20638    }
20639
20640    default:
20641       break; /* hmm, nothing matched */
20642
20643    }
20644
20645    /* ================ 16-bit misc cases ================ */
20646
20647    switch (INSN0(15,0)) {
20648       case 0xBF00:
20649          /* ------ NOP ------ */
20650          DIP("nop\n");
20651          goto decode_success;
20652       case 0xBF10: // YIELD
20653       case 0xBF20: // WFE
20654          /* ------ WFE, YIELD ------ */
20655          /* Both appear to get used as a spin-loop hints.  Do the usual thing,
20656             which is to continue after yielding. */
20657          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
20658                             Ijk_Yield,
20659                             IRConst_U32((guest_R15_curr_instr_notENC + 2)
20660                                         | 1 /*CPSR.T*/),
20661                             OFFB_R15T ));
20662          Bool isWFE = INSN0(15,0) == 0xBF20;
20663          DIP(isWFE ? "wfe\n" : "yield\n");
20664          goto decode_success;
20665       case 0xBF40:
20666          /* ------ SEV ------ */
20667          /* Treat this as a no-op.  Any matching WFEs won't really
20668             cause the host CPU to snooze; they just cause V to try to
20669             run some other thread for a while.  So there's no point in
20670             really doing anything for SEV. */
20671          DIP("sev\n");
20672          goto decode_success;
20673       default:
20674          break; /* fall through */
20675    }
20676
20677    /* ----------------------------------------------------------- */
20678    /* --                                                       -- */
20679    /* -- Thumb 32-bit integer instructions                     -- */
20680    /* --                                                       -- */
20681    /* ----------------------------------------------------------- */
20682
20683 #  define INSN1(_bMax,_bMin)  SLICE_UInt(((UInt)insn1), (_bMax), (_bMin))
20684
20685    /* second 16 bits of the instruction, if any */
20686    vassert(insn1 == 0);
20687    insn1 = getUShortLittleEndianly( guest_instr+2 );
20688
20689    anOp   = Iop_INVALID; /* paranoia */
20690    anOpNm = NULL;        /* paranoia */
20691
20692    /* Change result defaults to suit 32-bit insns. */
20693    vassert(dres.whatNext   == Dis_Continue);
20694    vassert(dres.len        == 2);
20695    dres.len = 4;
20696
20697    /* ---------------- BL/BLX simm26 ---------------- */
20698    if (BITS5(1,1,1,1,0) == INSN0(15,11) && BITS2(1,1) == INSN1(15,14)) {
20699       UInt isBL = INSN1(12,12);
20700       UInt bS   = INSN0(10,10);
20701       UInt bJ1  = INSN1(13,13);
20702       UInt bJ2  = INSN1(11,11);
20703       UInt bI1  = 1 ^ (bJ1 ^ bS);
20704       UInt bI2  = 1 ^ (bJ2 ^ bS);
20705       UInt uimm25
20706          =   (bS          << (1 + 1 + 10 + 11 + 1))
20707            | (bI1         << (1 + 10 + 11 + 1))
20708            | (bI2         << (10 + 11 + 1))
20709            | (INSN0(9,0)  << (11 + 1))
20710            | (INSN1(10,0) << 1);
20711       uimm25 <<= 7;
20712       Int simm25 = (Int)uimm25;
20713       simm25 >>= 7;
20714
20715       vassert(0 == (guest_R15_curr_instr_notENC & 1));
20716       UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
20717
20718       /* One further validity case to check: in the case of BLX
20719          (not-BL), that insn1[0] must be zero. */
20720       Bool valid = True;
20721       if (isBL == 0 && INSN1(0,0) == 1) valid = False;
20722       if (valid) {
20723          /* Only allowed outside or last-in IT block; SIGILL if not so. */
20724          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20725          // and skip this insn if not selected; being cleverer is too
20726          // difficult
20727          mk_skip_over_T32_if_cond_is_false(condT);
20728          condT = IRTemp_INVALID;
20729          // now uncond
20730
20731          /* We're returning to Thumb code, hence "| 1" */
20732          putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 4) | 1 ),
20733                    IRTemp_INVALID);
20734          if (isBL) {
20735             /* BL: unconditional T -> T call */
20736             /* we're calling Thumb code, hence "| 1" */
20737             llPutIReg(15, mkU32( dst | 1 ));
20738             DIP("bl 0x%x (stay in Thumb mode)\n", dst);
20739          } else {
20740             /* BLX: unconditional T -> A call */
20741             /* we're calling ARM code, hence "& 3" to align to a
20742                valid ARM insn address */
20743             llPutIReg(15, mkU32( dst & ~3 ));
20744             DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
20745          }
20746          dres.whatNext    = Dis_StopHere;
20747          dres.jk_StopHere = Ijk_Call;
20748          goto decode_success;
20749       }
20750    }
20751
20752    /* ---------------- {LD,ST}M{IA,DB} ---------------- */
20753    if (0x3a2 == INSN0(15,6) // {LD,ST}MIA
20754        || 0x3a4 == INSN0(15,6)) { // {LD,ST}MDB
20755       UInt bW      = INSN0(5,5); /* writeback Rn ? */
20756       UInt bL      = INSN0(4,4);
20757       UInt rN      = INSN0(3,0);
20758       UInt bP      = INSN1(15,15); /* reglist entry for r15 */
20759       UInt bM      = INSN1(14,14); /* reglist entry for r14 */
20760       UInt rLmost  = INSN1(12,0);  /* reglist entry for r0 .. 12 */
20761       UInt rL13    = INSN1(13,13); /* must be zero */
20762       UInt regList = 0;
20763       Bool valid   = True;
20764
20765       UInt bINC    = 1;
20766       UInt bBEFORE = 0;
20767       if (INSN0(15,6) == 0x3a4) {
20768          bINC    = 0;
20769          bBEFORE = 1;
20770       }
20771
20772       /* detect statically invalid cases, and construct the final
20773          reglist */
20774       if (rL13 == 1)
20775          valid = False;
20776
20777       if (bL == 1) {
20778          regList = (bP << 15) | (bM << 14) | rLmost;
20779          if (rN == 15)                       valid = False;
20780          if (popcount32(regList) < 2)        valid = False;
20781          if (bP == 1 && bM == 1)             valid = False;
20782          if (bW == 1 && (regList & (1<<rN))) valid = False;
20783       } else {
20784          regList = (bM << 14) | rLmost;
20785          if (bP == 1)                        valid = False;
20786          if (rN == 15)                       valid = False;
20787          if (popcount32(regList) < 2)        valid = False;
20788          if (bW == 1 && (regList & (1<<rN))) valid = False;
20789       }
20790
20791       if (valid) {
20792          if (bL == 1 && bP == 1) {
20793             // We'll be writing the PC.  Hence:
20794             /* Only allowed outside or last-in IT block; SIGILL if not so. */
20795             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20796          }
20797
20798          /* Go uncond: */
20799          mk_skip_over_T32_if_cond_is_false(condT);
20800          condT = IRTemp_INVALID;
20801          // now uncond
20802
20803          /* Generate the IR.  This might generate a write to R15. */
20804          mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
20805
20806          if (bL == 1 && (regList & (1<<15))) {
20807             // If we wrote to R15, we have an interworking return to
20808             // deal with.
20809             llPutIReg(15, llGetIReg(15));
20810             dres.jk_StopHere = Ijk_Ret;
20811             dres.whatNext    = Dis_StopHere;
20812          }
20813
20814          DIP("%sm%c%c r%u%s, {0x%04x}\n",
20815               bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
20816               rN, bW ? "!" : "", regList);
20817
20818          goto decode_success;
20819       }
20820    }
20821
20822    /* -------------- (T3) ADD{S}.W Rd, Rn, #constT -------------- */
20823    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20824        && INSN0(9,5) == BITS5(0,1,0,0,0)
20825        && INSN1(15,15) == 0) {
20826       UInt bS = INSN0(4,4);
20827       UInt rN = INSN0(3,0);
20828       UInt rD = INSN1(11,8);
20829       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
20830       /* but allow "add.w reg, sp, #constT" for reg != PC */
20831       if (!valid && rD <= 14 && rN == 13)
20832          valid = True;
20833       if (valid) {
20834          IRTemp argL  = newTemp(Ity_I32);
20835          IRTemp argR  = newTemp(Ity_I32);
20836          IRTemp res   = newTemp(Ity_I32);
20837          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
20838          assign(argL, getIRegT(rN));
20839          assign(argR, mkU32(imm32));
20840          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
20841          putIRegT(rD, mkexpr(res), condT);
20842          if (bS == 1)
20843             setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
20844          DIP("add%s.w r%u, r%u, #%u\n",
20845              bS == 1 ? "s" : "", rD, rN, imm32);
20846          goto decode_success;
20847       }
20848    }
20849
20850    /* ---------------- (T4) ADDW Rd, Rn, #uimm12 -------------- */
20851    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20852        && INSN0(9,4) == BITS6(1,0,0,0,0,0)
20853        && INSN1(15,15) == 0) {
20854       UInt rN = INSN0(3,0);
20855       UInt rD = INSN1(11,8);
20856       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
20857       /* but allow "addw reg, sp, #uimm12" for reg != PC */
20858       if (!valid && rD <= 14 && rN == 13)
20859          valid = True;
20860       if (valid) {
20861          IRTemp argL = newTemp(Ity_I32);
20862          IRTemp argR = newTemp(Ity_I32);
20863          IRTemp res  = newTemp(Ity_I32);
20864          UInt imm12  = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
20865          assign(argL, getIRegT(rN));
20866          assign(argR, mkU32(imm12));
20867          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
20868          putIRegT(rD, mkexpr(res), condT);
20869          DIP("addw r%u, r%u, #%u\n", rD, rN, imm12);
20870          goto decode_success;
20871       }
20872    }
20873
20874    /* ---------------- (T2) CMP.W Rn, #constT ---------------- */
20875    /* ---------------- (T2) CMN.W Rn, #constT ---------------- */
20876    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20877        && (   INSN0(9,4) == BITS6(0,1,1,0,1,1)  // CMP
20878            || INSN0(9,4) == BITS6(0,1,0,0,0,1)) // CMN
20879        && INSN1(15,15) == 0
20880        && INSN1(11,8) == BITS4(1,1,1,1)) {
20881       UInt rN = INSN0(3,0);
20882       if (rN != 15) {
20883          IRTemp argL  = newTemp(Ity_I32);
20884          IRTemp argR  = newTemp(Ity_I32);
20885          Bool   isCMN = INSN0(9,4) == BITS6(0,1,0,0,0,1);
20886          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
20887          assign(argL, getIRegT(rN));
20888          assign(argR, mkU32(imm32));
20889          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
20890                          argL, argR, condT );
20891          DIP("%s.w r%u, #%u\n", isCMN ? "cmn" : "cmp", rN, imm32);
20892          goto decode_success;
20893       }
20894    }
20895
20896    /* -------------- (T1) TST.W Rn, #constT -------------- */
20897    /* -------------- (T1) TEQ.W Rn, #constT -------------- */
20898    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20899        && (   INSN0(9,4) == BITS6(0,0,0,0,0,1)  // TST
20900            || INSN0(9,4) == BITS6(0,0,1,0,0,1)) // TEQ
20901        && INSN1(15,15) == 0
20902        && INSN1(11,8) == BITS4(1,1,1,1)) {
20903       UInt rN = INSN0(3,0);
20904       if (!isBadRegT(rN)) { // yes, really, it's inconsistent with CMP.W
20905          Bool  isTST  = INSN0(9,4) == BITS6(0,0,0,0,0,1);
20906          IRTemp argL  = newTemp(Ity_I32);
20907          IRTemp argR  = newTemp(Ity_I32);
20908          IRTemp res   = newTemp(Ity_I32);
20909          IRTemp oldV  = newTemp(Ity_I32);
20910          IRTemp oldC  = newTemp(Ity_I32);
20911          Bool   updC  = False;
20912          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
20913          assign(argL, getIRegT(rN));
20914          assign(argR, mkU32(imm32));
20915          assign(res,  binop(isTST ? Iop_And32 : Iop_Xor32,
20916                             mkexpr(argL), mkexpr(argR)));
20917          assign( oldV, mk_armg_calculate_flag_v() );
20918          assign( oldC, updC
20919                        ? mkU32((imm32 >> 31) & 1)
20920                        : mk_armg_calculate_flag_c() );
20921          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
20922          DIP("%s.w r%u, #%u\n", isTST ? "tst" : "teq", rN, imm32);
20923          goto decode_success;
20924       }
20925    }
20926
20927    /* -------------- (T3) SUB{S}.W Rd, Rn, #constT -------------- */
20928    /* -------------- (T3) RSB{S}.W Rd, Rn, #constT -------------- */
20929    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20930        && (INSN0(9,5) == BITS5(0,1,1,0,1) // SUB
20931            || INSN0(9,5) == BITS5(0,1,1,1,0)) // RSB
20932        && INSN1(15,15) == 0) {
20933       Bool isRSB = INSN0(9,5) == BITS5(0,1,1,1,0);
20934       UInt bS    = INSN0(4,4);
20935       UInt rN    = INSN0(3,0);
20936       UInt rD    = INSN1(11,8);
20937       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
20938       /* but allow "sub{s}.w reg, sp, #constT
20939          this is (T2) of "SUB (SP minus immediate)" */
20940       if (!valid && !isRSB && rN == 13 && rD != 15)
20941          valid = True;
20942       if (valid) {
20943          IRTemp argL  = newTemp(Ity_I32);
20944          IRTemp argR  = newTemp(Ity_I32);
20945          IRTemp res   = newTemp(Ity_I32);
20946          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
20947          assign(argL, getIRegT(rN));
20948          assign(argR, mkU32(imm32));
20949          assign(res,  isRSB
20950                       ? binop(Iop_Sub32, mkexpr(argR), mkexpr(argL))
20951                       : binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
20952          putIRegT(rD, mkexpr(res), condT);
20953          if (bS == 1) {
20954             if (isRSB)
20955                setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
20956             else
20957                setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
20958          }
20959          DIP("%s%s.w r%u, r%u, #%u\n",
20960              isRSB ? "rsb" : "sub", bS == 1 ? "s" : "", rD, rN, imm32);
20961          goto decode_success;
20962       }
20963    }
20964
20965    /* -------------- (T4) SUBW Rd, Rn, #uimm12 ------------------- */
20966    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20967        && INSN0(9,4) == BITS6(1,0,1,0,1,0)
20968        && INSN1(15,15) == 0) {
20969       UInt rN = INSN0(3,0);
20970       UInt rD = INSN1(11,8);
20971       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
20972       /* but allow "subw sp, sp, #uimm12" */
20973       if (!valid && rD == 13 && rN == 13)
20974          valid = True;
20975       if (valid) {
20976          IRTemp argL  = newTemp(Ity_I32);
20977          IRTemp argR  = newTemp(Ity_I32);
20978          IRTemp res   = newTemp(Ity_I32);
20979          UInt imm12   = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
20980          assign(argL, getIRegT(rN));
20981          assign(argR, mkU32(imm12));
20982          assign(res,  binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
20983          putIRegT(rD, mkexpr(res), condT);
20984          DIP("subw r%u, r%u, #%u\n", rD, rN, imm12);
20985          goto decode_success;
20986       }
20987    }
20988
20989    /* -------------- (T1) ADC{S}.W Rd, Rn, #constT -------------- */
20990    /* -------------- (T1) SBC{S}.W Rd, Rn, #constT -------------- */
20991    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20992        && (   INSN0(9,5) == BITS5(0,1,0,1,0)  // ADC
20993            || INSN0(9,5) == BITS5(0,1,0,1,1)) // SBC
20994        && INSN1(15,15) == 0) {
20995       /* ADC:  Rd = Rn + constT + oldC */
20996       /* SBC:  Rd = Rn - constT - (oldC ^ 1) */
20997       UInt bS    = INSN0(4,4);
20998       UInt rN    = INSN0(3,0);
20999       UInt rD    = INSN1(11,8);
21000       if (!isBadRegT(rN) && !isBadRegT(rD)) {
21001          IRTemp argL  = newTemp(Ity_I32);
21002          IRTemp argR  = newTemp(Ity_I32);
21003          IRTemp res   = newTemp(Ity_I32);
21004          IRTemp oldC  = newTemp(Ity_I32);
21005          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
21006          assign(argL, getIRegT(rN));
21007          assign(argR, mkU32(imm32));
21008          assign(oldC, mk_armg_calculate_flag_c() );
21009          const HChar* nm  = "???";
21010          switch (INSN0(9,5)) {
21011             case BITS5(0,1,0,1,0): // ADC
21012                nm = "adc";
21013                assign(res,
21014                       binop(Iop_Add32,
21015                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
21016                             mkexpr(oldC) ));
21017                putIRegT(rD, mkexpr(res), condT);
21018                if (bS)
21019                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
21020                                      argL, argR, oldC, condT );
21021                break;
21022             case BITS5(0,1,0,1,1): // SBC
21023                nm = "sbc";
21024                assign(res,
21025                       binop(Iop_Sub32,
21026                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
21027                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
21028                putIRegT(rD, mkexpr(res), condT);
21029                if (bS)
21030                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
21031                                      argL, argR, oldC, condT );
21032                break;
21033             default:
21034               vassert(0);
21035          }
21036          DIP("%s%s.w r%u, r%u, #%u\n",
21037              nm, bS == 1 ? "s" : "", rD, rN, imm32);
21038          goto decode_success;
21039       }
21040    }
21041
21042    /* -------------- (T1) ORR{S}.W Rd, Rn, #constT -------------- */
21043    /* -------------- (T1) AND{S}.W Rd, Rn, #constT -------------- */
21044    /* -------------- (T1) BIC{S}.W Rd, Rn, #constT -------------- */
21045    /* -------------- (T1) EOR{S}.W Rd, Rn, #constT -------------- */
21046    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21047        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // ORR
21048            || INSN0(9,5) == BITS5(0,0,0,0,0)  // AND
21049            || INSN0(9,5) == BITS5(0,0,0,0,1)  // BIC
21050            || INSN0(9,5) == BITS5(0,0,1,0,0)  // EOR
21051            || INSN0(9,5) == BITS5(0,0,0,1,1)) // ORN
21052        && INSN1(15,15) == 0) {
21053       UInt bS = INSN0(4,4);
21054       UInt rN = INSN0(3,0);
21055       UInt rD = INSN1(11,8);
21056       if (!isBadRegT(rN) && !isBadRegT(rD)) {
21057          Bool   notArgR = False;
21058          IROp   op      = Iop_INVALID;
21059          const HChar* nm = "???";
21060          switch (INSN0(9,5)) {
21061             case BITS5(0,0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
21062             case BITS5(0,0,0,0,0): op = Iop_And32; nm = "and"; break;
21063             case BITS5(0,0,0,0,1): op = Iop_And32; nm = "bic";
21064                                    notArgR = True; break;
21065             case BITS5(0,0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
21066             case BITS5(0,0,0,1,1): op = Iop_Or32;  nm = "orn";
21067                                    notArgR = True; break;
21068             default: vassert(0);
21069          }
21070          IRTemp argL  = newTemp(Ity_I32);
21071          IRTemp argR  = newTemp(Ity_I32);
21072          IRTemp res   = newTemp(Ity_I32);
21073          Bool   updC  = False;
21074          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
21075          assign(argL, getIRegT(rN));
21076          assign(argR, mkU32(notArgR ? ~imm32 : imm32));
21077          assign(res,  binop(op, mkexpr(argL), mkexpr(argR)));
21078          putIRegT(rD, mkexpr(res), condT);
21079          if (bS) {
21080             IRTemp oldV = newTemp(Ity_I32);
21081             IRTemp oldC = newTemp(Ity_I32);
21082             assign( oldV, mk_armg_calculate_flag_v() );
21083             assign( oldC, updC
21084                           ? mkU32((imm32 >> 31) & 1)
21085                           : mk_armg_calculate_flag_c() );
21086             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21087                                condT );
21088          }
21089          DIP("%s%s.w r%u, r%u, #%u\n",
21090              nm, bS == 1 ? "s" : "", rD, rN, imm32);
21091          goto decode_success;
21092       }
21093    }
21094
21095    /* ---------- (T3) ADD{S}.W Rd, Rn, Rm, {shift} ---------- */
21096    /* ---------- (T3) SUB{S}.W Rd, Rn, Rm, {shift} ---------- */
21097    /* ---------- (T3) RSB{S}.W Rd, Rn, Rm, {shift} ---------- */
21098    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21099        && (   INSN0(8,5) == BITS4(1,0,0,0)  // add subopc
21100            || INSN0(8,5) == BITS4(1,1,0,1)  // sub subopc
21101            || INSN0(8,5) == BITS4(1,1,1,0)) // rsb subopc
21102        && INSN1(15,15) == 0) {
21103       UInt rN   = INSN0(3,0);
21104       UInt rD   = INSN1(11,8);
21105       UInt rM   = INSN1(3,0);
21106       UInt bS   = INSN0(4,4);
21107       UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21108       UInt how  = INSN1(5,4);
21109
21110       Bool valid = !isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM);
21111       /* but allow "add.w reg, sp, reg, lsl #N for N=0..31
21112          (T3) "ADD (SP plus register) */
21113       if (!valid && INSN0(8,5) == BITS4(1,0,0,0) // add
21114           && rD != 15 && rN == 13 && imm5 <= 31 && how == 0) {
21115          valid = True;
21116       }
21117       /* also allow "sub.w reg, sp, reg   lsl #N for N=0 .. 5
21118          (T1) "SUB (SP minus register) */
21119       if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // sub
21120           && rD != 15 && rN == 13 && imm5 <= 5 && how == 0) {
21121          valid = True;
21122       }
21123       if (valid) {
21124          Bool   swap = False;
21125          IROp   op   = Iop_INVALID;
21126          const HChar* nm = "???";
21127          switch (INSN0(8,5)) {
21128             case BITS4(1,0,0,0): op = Iop_Add32; nm = "add"; break;
21129             case BITS4(1,1,0,1): op = Iop_Sub32; nm = "sub"; break;
21130             case BITS4(1,1,1,0): op = Iop_Sub32; nm = "rsb";
21131                                  swap = True; break;
21132             default: vassert(0);
21133          }
21134
21135          IRTemp argL = newTemp(Ity_I32);
21136          assign(argL, getIRegT(rN));
21137
21138          IRTemp rMt = newTemp(Ity_I32);
21139          assign(rMt, getIRegT(rM));
21140
21141          IRTemp argR = newTemp(Ity_I32);
21142          compute_result_and_C_after_shift_by_imm5(
21143             dis_buf, &argR, NULL, rMt, how, imm5, rM
21144          );
21145
21146          IRTemp res = newTemp(Ity_I32);
21147          assign(res, swap
21148                      ? binop(op, mkexpr(argR), mkexpr(argL))
21149                      : binop(op, mkexpr(argL), mkexpr(argR)));
21150
21151          putIRegT(rD, mkexpr(res), condT);
21152          if (bS) {
21153             switch (op) {
21154                case Iop_Add32:
21155                   setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
21156                   break;
21157                case Iop_Sub32:
21158                   if (swap)
21159                      setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
21160                   else
21161                      setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
21162                   break;
21163                default:
21164                   vassert(0);
21165             }
21166          }
21167
21168          DIP("%s%s.w r%u, r%u, %s\n",
21169              nm, bS ? "s" : "", rD, rN, dis_buf);
21170          goto decode_success;
21171       }
21172    }
21173
21174    /* ---------- (T3) ADC{S}.W Rd, Rn, Rm, {shift} ---------- */
21175    /* ---------- (T2) SBC{S}.W Rd, Rn, Rm, {shift} ---------- */
21176    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21177        && (   INSN0(8,5) == BITS4(1,0,1,0)   // adc subopc
21178            || INSN0(8,5) == BITS4(1,0,1,1))  // sbc subopc
21179        && INSN1(15,15) == 0) {
21180       /* ADC:  Rd = Rn + shifter_operand + oldC */
21181       /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
21182       UInt rN = INSN0(3,0);
21183       UInt rD = INSN1(11,8);
21184       UInt rM = INSN1(3,0);
21185       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21186          UInt bS   = INSN0(4,4);
21187          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21188          UInt how  = INSN1(5,4);
21189
21190          IRTemp argL = newTemp(Ity_I32);
21191          assign(argL, getIRegT(rN));
21192
21193          IRTemp rMt = newTemp(Ity_I32);
21194          assign(rMt, getIRegT(rM));
21195
21196          IRTemp oldC = newTemp(Ity_I32);
21197          assign(oldC, mk_armg_calculate_flag_c());
21198
21199          IRTemp argR = newTemp(Ity_I32);
21200          compute_result_and_C_after_shift_by_imm5(
21201             dis_buf, &argR, NULL, rMt, how, imm5, rM
21202          );
21203
21204          const HChar* nm  = "???";
21205          IRTemp res = newTemp(Ity_I32);
21206          switch (INSN0(8,5)) {
21207             case BITS4(1,0,1,0): // ADC
21208                nm = "adc";
21209                assign(res,
21210                       binop(Iop_Add32,
21211                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
21212                             mkexpr(oldC) ));
21213                putIRegT(rD, mkexpr(res), condT);
21214                if (bS)
21215                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
21216                                      argL, argR, oldC, condT );
21217                break;
21218             case BITS4(1,0,1,1): // SBC
21219                nm = "sbc";
21220                assign(res,
21221                       binop(Iop_Sub32,
21222                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
21223                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
21224                putIRegT(rD, mkexpr(res), condT);
21225                if (bS)
21226                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
21227                                      argL, argR, oldC, condT );
21228                break;
21229             default:
21230                vassert(0);
21231          }
21232
21233          DIP("%s%s.w r%u, r%u, %s\n",
21234              nm, bS ? "s" : "", rD, rN, dis_buf);
21235          goto decode_success;
21236       }
21237    }
21238
21239    /* ---------- (T3) AND{S}.W Rd, Rn, Rm, {shift} ---------- */
21240    /* ---------- (T3) ORR{S}.W Rd, Rn, Rm, {shift} ---------- */
21241    /* ---------- (T3) EOR{S}.W Rd, Rn, Rm, {shift} ---------- */
21242    /* ---------- (T3) BIC{S}.W Rd, Rn, Rm, {shift} ---------- */
21243    /* ---------- (T1) ORN{S}.W Rd, Rn, Rm, {shift} ---------- */
21244    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21245        && (   INSN0(8,5) == BITS4(0,0,0,0)  // and subopc
21246            || INSN0(8,5) == BITS4(0,0,1,0)  // orr subopc
21247            || INSN0(8,5) == BITS4(0,1,0,0)  // eor subopc
21248            || INSN0(8,5) == BITS4(0,0,0,1)  // bic subopc
21249            || INSN0(8,5) == BITS4(0,0,1,1)) // orn subopc
21250        && INSN1(15,15) == 0) {
21251       UInt rN = INSN0(3,0);
21252       UInt rD = INSN1(11,8);
21253       UInt rM = INSN1(3,0);
21254       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21255          Bool notArgR = False;
21256          IROp op      = Iop_INVALID;
21257          const HChar* nm  = "???";
21258          switch (INSN0(8,5)) {
21259             case BITS4(0,0,0,0): op = Iop_And32; nm = "and"; break;
21260             case BITS4(0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
21261             case BITS4(0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
21262             case BITS4(0,0,0,1): op = Iop_And32; nm = "bic";
21263                                  notArgR = True; break;
21264             case BITS4(0,0,1,1): op = Iop_Or32; nm = "orn";
21265                                  notArgR = True; break;
21266             default: vassert(0);
21267          }
21268          UInt bS   = INSN0(4,4);
21269          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21270          UInt how  = INSN1(5,4);
21271
21272          IRTemp rNt = newTemp(Ity_I32);
21273          assign(rNt, getIRegT(rN));
21274
21275          IRTemp rMt = newTemp(Ity_I32);
21276          assign(rMt, getIRegT(rM));
21277
21278          IRTemp argR = newTemp(Ity_I32);
21279          IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21280
21281          compute_result_and_C_after_shift_by_imm5(
21282             dis_buf, &argR, bS ? &oldC : NULL, rMt, how, imm5, rM
21283          );
21284
21285          IRTemp res = newTemp(Ity_I32);
21286          if (notArgR) {
21287             vassert(op == Iop_And32 || op == Iop_Or32);
21288             assign(res, binop(op, mkexpr(rNt),
21289                                   unop(Iop_Not32, mkexpr(argR))));
21290          } else {
21291             assign(res, binop(op, mkexpr(rNt), mkexpr(argR)));
21292          }
21293
21294          putIRegT(rD, mkexpr(res), condT);
21295          if (bS) {
21296             IRTemp oldV = newTemp(Ity_I32);
21297             assign( oldV, mk_armg_calculate_flag_v() );
21298             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21299                                condT );
21300          }
21301
21302          DIP("%s%s.w r%u, r%u, %s\n",
21303              nm, bS ? "s" : "", rD, rN, dis_buf);
21304          goto decode_success;
21305       }
21306    }
21307
21308    /* -------------- (T?) LSL{S}.W Rd, Rn, Rm -------------- */
21309    /* -------------- (T?) LSR{S}.W Rd, Rn, Rm -------------- */
21310    /* -------------- (T?) ASR{S}.W Rd, Rn, Rm -------------- */
21311    /* -------------- (T?) ROR{S}.W Rd, Rn, Rm -------------- */
21312    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,0,0)
21313        && INSN1(15,12) == BITS4(1,1,1,1)
21314        && INSN1(7,4) == BITS4(0,0,0,0)) {
21315       UInt how = INSN0(6,5); // standard encoding
21316       UInt rN  = INSN0(3,0);
21317       UInt rD  = INSN1(11,8);
21318       UInt rM  = INSN1(3,0);
21319       UInt bS  = INSN0(4,4);
21320       Bool valid = !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rD);
21321       if (valid) {
21322          IRTemp rNt    = newTemp(Ity_I32);
21323          IRTemp rMt    = newTemp(Ity_I32);
21324          IRTemp res    = newTemp(Ity_I32);
21325          IRTemp oldC   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21326          IRTemp oldV   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21327          const HChar* nms[4] = { "lsl", "lsr", "asr", "ror" };
21328          const HChar* nm     = nms[how];
21329          assign(rNt, getIRegT(rN));
21330          assign(rMt, getIRegT(rM));
21331          compute_result_and_C_after_shift_by_reg(
21332             dis_buf, &res, bS ? &oldC : NULL,
21333             rNt, how, rMt, rN, rM
21334          );
21335          if (bS)
21336             assign(oldV, mk_armg_calculate_flag_v());
21337          putIRegT(rD, mkexpr(res), condT);
21338          if (bS) {
21339             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21340                                condT );
21341          }
21342          DIP("%s%s.w r%u, r%u, r%u\n",
21343              nm, bS ? "s" : "", rD, rN, rM);
21344          goto decode_success;
21345       }
21346    }
21347
21348    /* ------------ (T?) MOV{S}.W Rd, Rn, {shift} ------------ */
21349    /* ------------ (T?) MVN{S}.W Rd, Rn, {shift} ------------ */
21350    if ((INSN0(15,0) & 0xFFCF) == 0xEA4F
21351        && INSN1(15,15) == 0) {
21352       UInt rD      = INSN1(11,8);
21353       UInt rN      = INSN1(3,0);
21354       UInt bS      = INSN0(4,4);
21355       UInt isMVN   = INSN0(5,5);
21356       Bool regsOK  = (bS || isMVN)
21357                         ? (!isBadRegT(rD) && !isBadRegT(rN))
21358                         : (rD != 15 && rN != 15 && (rD != 13 || rN != 13));
21359       if (regsOK) {
21360          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
21361          UInt how   = INSN1(5,4);
21362
21363          IRTemp rNt = newTemp(Ity_I32);
21364          assign(rNt, getIRegT(rN));
21365
21366          IRTemp oldRn = newTemp(Ity_I32);
21367          IRTemp oldC  = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21368          compute_result_and_C_after_shift_by_imm5(
21369             dis_buf, &oldRn, bS ? &oldC : NULL, rNt, how, imm5, rN
21370          );
21371
21372          IRTemp res = newTemp(Ity_I32);
21373          assign(res, isMVN ? unop(Iop_Not32, mkexpr(oldRn))
21374                            : mkexpr(oldRn));
21375
21376          putIRegT(rD, mkexpr(res), condT);
21377          if (bS) {
21378             IRTemp oldV = newTemp(Ity_I32);
21379             assign( oldV, mk_armg_calculate_flag_v() );
21380             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT);
21381          }
21382          DIP("%s%s.w r%u, %s\n",
21383              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, dis_buf);
21384          goto decode_success;
21385       }
21386    }
21387
21388    /* -------------- (T?) TST.W Rn, Rm, {shift} -------------- */
21389    /* -------------- (T?) TEQ.W Rn, Rm, {shift} -------------- */
21390    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21391        && (   INSN0(8,4) == BITS5(0,0,0,0,1)  // TST
21392            || INSN0(8,4) == BITS5(0,1,0,0,1)) // TEQ
21393        && INSN1(15,15) == 0
21394        && INSN1(11,8) == BITS4(1,1,1,1)) {
21395       UInt rN = INSN0(3,0);
21396       UInt rM = INSN1(3,0);
21397       if (!isBadRegT(rN) && !isBadRegT(rM)) {
21398          Bool isTST = INSN0(8,4) == BITS5(0,0,0,0,1);
21399
21400          UInt how  = INSN1(5,4);
21401          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21402
21403          IRTemp argL = newTemp(Ity_I32);
21404          assign(argL, getIRegT(rN));
21405
21406          IRTemp rMt = newTemp(Ity_I32);
21407          assign(rMt, getIRegT(rM));
21408
21409          IRTemp argR = newTemp(Ity_I32);
21410          IRTemp oldC = newTemp(Ity_I32);
21411          compute_result_and_C_after_shift_by_imm5(
21412             dis_buf, &argR, &oldC, rMt, how, imm5, rM
21413          );
21414
21415          IRTemp oldV = newTemp(Ity_I32);
21416          assign( oldV, mk_armg_calculate_flag_v() );
21417
21418          IRTemp res = newTemp(Ity_I32);
21419          assign(res, binop(isTST ? Iop_And32 : Iop_Xor32,
21420                            mkexpr(argL), mkexpr(argR)));
21421
21422          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21423                             condT );
21424          DIP("%s.w r%u, %s\n", isTST ? "tst" : "teq", rN, dis_buf);
21425          goto decode_success;
21426       }
21427    }
21428
21429    /* -------------- (T3) CMP.W Rn, Rm, {shift} -------------- */
21430    /* -------------- (T2) CMN.W Rn, Rm, {shift} -------------- */
21431    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21432        && (   INSN0(8,4) == BITS5(1,1,0,1,1)  // CMP
21433            || INSN0(8,4) == BITS5(1,0,0,0,1)) // CMN
21434        && INSN1(15,15) == 0
21435        && INSN1(11,8) == BITS4(1,1,1,1)) {
21436       UInt rN = INSN0(3,0);
21437       UInt rM = INSN1(3,0);
21438       if (!isBadRegT(rN) && !isBadRegT(rM)) {
21439          Bool isCMN = INSN0(8,4) == BITS5(1,0,0,0,1);
21440          UInt how   = INSN1(5,4);
21441          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
21442
21443          IRTemp argL = newTemp(Ity_I32);
21444          assign(argL, getIRegT(rN));
21445
21446          IRTemp rMt = newTemp(Ity_I32);
21447          assign(rMt, getIRegT(rM));
21448
21449          IRTemp argR = newTemp(Ity_I32);
21450          compute_result_and_C_after_shift_by_imm5(
21451             dis_buf, &argR, NULL, rMt, how, imm5, rM
21452          );
21453
21454          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
21455                          argL, argR, condT );
21456
21457          DIP("%s.w r%u, %s\n", isCMN ? "cmn" : "cmp", rN, dis_buf);
21458          goto decode_success;
21459       }
21460    }
21461
21462    /* -------------- (T2) MOV{S}.W Rd, #constT -------------- */
21463    /* -------------- (T2) MVN{S}.W Rd, #constT -------------- */
21464    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21465        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // MOV
21466            || INSN0(9,5) == BITS5(0,0,0,1,1)) // MVN
21467        && INSN0(3,0) == BITS4(1,1,1,1)
21468        && INSN1(15,15) == 0) {
21469       UInt rD = INSN1(11,8);
21470       if (!isBadRegT(rD)) {
21471          Bool   updC  = False;
21472          UInt   bS    = INSN0(4,4);
21473          Bool   isMVN = INSN0(5,5) == 1;
21474          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
21475          IRTemp res   = newTemp(Ity_I32);
21476          assign(res, mkU32(isMVN ? ~imm32 : imm32));
21477          putIRegT(rD, mkexpr(res), condT);
21478          if (bS) {
21479             IRTemp oldV = newTemp(Ity_I32);
21480             IRTemp oldC = newTemp(Ity_I32);
21481             assign( oldV, mk_armg_calculate_flag_v() );
21482             assign( oldC, updC
21483                           ? mkU32((imm32 >> 31) & 1)
21484                           : mk_armg_calculate_flag_c() );
21485             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21486                                condT );
21487          }
21488          DIP("%s%s.w r%u, #%u\n",
21489              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, imm32);
21490          goto decode_success;
21491       }
21492    }
21493
21494    /* -------------- (T3) MOVW Rd, #imm16 -------------- */
21495    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21496        && INSN0(9,4) == BITS6(1,0,0,1,0,0)
21497        && INSN1(15,15) == 0) {
21498       UInt rD = INSN1(11,8);
21499       if (!isBadRegT(rD)) {
21500          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
21501                       | (INSN1(14,12) << 8) | INSN1(7,0);
21502          putIRegT(rD, mkU32(imm16), condT);
21503          DIP("movw r%u, #%u\n", rD, imm16);
21504          goto decode_success;
21505       }
21506    }
21507
21508    /* ---------------- MOVT Rd, #imm16 ---------------- */
21509    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21510        && INSN0(9,4) == BITS6(1,0,1,1,0,0)
21511        && INSN1(15,15) == 0) {
21512       UInt rD = INSN1(11,8);
21513       if (!isBadRegT(rD)) {
21514          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
21515                       | (INSN1(14,12) << 8) | INSN1(7,0);
21516          IRTemp res = newTemp(Ity_I32);
21517          assign(res,
21518                 binop(Iop_Or32,
21519                       binop(Iop_And32, getIRegT(rD), mkU32(0xFFFF)),
21520                       mkU32(imm16 << 16)));
21521          putIRegT(rD, mkexpr(res), condT);
21522          DIP("movt r%u, #%u\n", rD, imm16);
21523          goto decode_success;
21524       }
21525    }
21526
21527    /* ---------------- LD/ST reg+/-#imm8 ---------------- */
21528    /* Loads and stores of the form:
21529          op  Rt, [Rn, #-imm8]      or
21530          op  Rt, [Rn], #+/-imm8    or
21531          op  Rt, [Rn, #+/-imm8]!
21532       where op is one of
21533          ldrb ldrh ldr  ldrsb ldrsh
21534          strb strh str
21535    */
21536    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0) && INSN1(11,11) == 1) {
21537       Bool   valid  = True;
21538       Bool   syned  = False;
21539       Bool   isST   = False;
21540       IRType ty     = Ity_I8;
21541       const HChar* nm = "???";
21542
21543       switch (INSN0(8,4)) {
21544          case BITS5(0,0,0,0,0):   // strb
21545             nm = "strb"; isST = True; break;
21546          case BITS5(0,0,0,0,1):   // ldrb
21547             nm = "ldrb"; break;
21548          case BITS5(1,0,0,0,1):   // ldrsb
21549             nm = "ldrsb"; syned = True; break;
21550          case BITS5(0,0,0,1,0):   // strh
21551             nm = "strh"; ty = Ity_I16; isST = True; break;
21552          case BITS5(0,0,0,1,1):   // ldrh
21553             nm = "ldrh"; ty = Ity_I16; break;
21554          case BITS5(1,0,0,1,1):   // ldrsh
21555             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
21556          case BITS5(0,0,1,0,0):   // str
21557             nm = "str"; ty = Ity_I32; isST = True; break;
21558          case BITS5(0,0,1,0,1):
21559             nm = "ldr"; ty = Ity_I32; break;  // ldr
21560          default:
21561             valid = False; break;
21562       }
21563
21564       UInt rN      = INSN0(3,0);
21565       UInt rT      = INSN1(15,12);
21566       UInt bP      = INSN1(10,10);
21567       UInt bU      = INSN1(9,9);
21568       UInt bW      = INSN1(8,8);
21569       UInt imm8    = INSN1(7,0);
21570       Bool loadsPC = False;
21571
21572       if (valid) {
21573          if (bP == 1 && bU == 1 && bW == 0)
21574             valid = False;
21575          if (bP == 0 && bW == 0)
21576             valid = False;
21577          if (rN == 15)
21578             valid = False;
21579          if (bW == 1 && rN == rT)
21580             valid = False;
21581          if (ty == Ity_I8 || ty == Ity_I16) {
21582             if (isBadRegT(rT))
21583                valid = False;
21584          } else {
21585             /* ty == Ity_I32 */
21586             if (isST && rT == 15)
21587                valid = False;
21588             if (!isST && rT == 15)
21589                loadsPC = True;
21590          }
21591       }
21592
21593       if (valid) {
21594          // if it's a branch, it can't happen in the middle of an IT block
21595          // Also, if it is a branch, make it unconditional at this point.
21596          // Doing conditional branches in-line is too complex (for now)
21597          if (loadsPC) {
21598             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
21599             // go uncond
21600             mk_skip_over_T32_if_cond_is_false(condT);
21601             condT = IRTemp_INVALID;
21602             // now uncond
21603          }
21604
21605          IRTemp preAddr = newTemp(Ity_I32);
21606          assign(preAddr, getIRegT(rN));
21607
21608          IRTemp postAddr = newTemp(Ity_I32);
21609          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
21610                                 mkexpr(preAddr), mkU32(imm8)));
21611
21612          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
21613
21614          if (isST) {
21615
21616             /* Store.  If necessary, update the base register before
21617                the store itself, so that the common idiom of "str rX,
21618                [sp, #-4]!" (store rX at sp-4, then do new sp = sp-4,
21619                a.k.a "push rX") doesn't cause Memcheck to complain
21620                that the access is below the stack pointer.  Also, not
21621                updating sp before the store confuses Valgrind's
21622                dynamic stack-extending logic.  So do it before the
21623                store.  Hence we need to snarf the store data before
21624                doing the basereg update. */
21625
21626             /* get hold of the data to be stored */
21627             IRTemp oldRt = newTemp(Ity_I32);
21628             assign(oldRt, getIRegT(rT));
21629
21630             /* Update Rn if necessary. */
21631             if (bW == 1) {
21632                vassert(rN != rT); // assured by validity check above
21633                putIRegT(rN, mkexpr(postAddr), condT);
21634             }
21635
21636             /* generate the transfer */
21637             IRExpr* data = NULL;
21638             switch (ty) {
21639                case Ity_I8:
21640                   data = unop(Iop_32to8, mkexpr(oldRt));
21641                   break;
21642                case Ity_I16:
21643                   data = unop(Iop_32to16, mkexpr(oldRt));
21644                   break;
21645                case Ity_I32:
21646                   data = mkexpr(oldRt);
21647                   break;
21648                default:
21649                   vassert(0);
21650             }
21651             storeGuardedLE(mkexpr(transAddr), data, condT);
21652
21653          } else {
21654
21655             /* Load. */
21656             IRTemp llOldRt = newTemp(Ity_I32);
21657             assign(llOldRt, llGetIReg(rT));
21658
21659             /* generate the transfer */
21660             IRTemp    newRt = newTemp(Ity_I32);
21661             IRLoadGOp widen = ILGop_INVALID;
21662             switch (ty) {
21663                case Ity_I8:
21664                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
21665                case Ity_I16:
21666                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
21667                case Ity_I32:
21668                   widen = ILGop_Ident32; break;
21669                default:
21670                   vassert(0);
21671             }
21672             loadGuardedLE(newRt, widen,
21673                           mkexpr(transAddr), mkexpr(llOldRt), condT);
21674             if (rT == 15) {
21675                vassert(loadsPC);
21676                /* We'll do the write to the PC just below */
21677             } else {
21678                vassert(!loadsPC);
21679                /* IRTemp_INVALID is OK here because in the case where
21680                   condT is false at run time, we're just putting the
21681                   old rT value back. */
21682                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21683             }
21684
21685             /* Update Rn if necessary. */
21686             if (bW == 1) {
21687                vassert(rN != rT); // assured by validity check above
21688                putIRegT(rN, mkexpr(postAddr), condT);
21689             }
21690
21691             if (loadsPC) {
21692                /* Presumably this is an interworking branch. */
21693                vassert(rN != 15); // assured by validity check above
21694                vassert(rT == 15);
21695                vassert(condT == IRTemp_INVALID); /* due to check above */
21696                llPutIReg(15, mkexpr(newRt));
21697                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
21698                dres.whatNext    = Dis_StopHere;
21699             }
21700          }
21701
21702          if (bP == 1 && bW == 0) {
21703             DIP("%s.w r%u, [r%u, #%c%u]\n",
21704                 nm, rT, rN, bU ? '+' : '-', imm8);
21705          }
21706          else if (bP == 1 && bW == 1) {
21707             DIP("%s.w r%u, [r%u, #%c%u]!\n",
21708                 nm, rT, rN, bU ? '+' : '-', imm8);
21709          }
21710          else {
21711             vassert(bP == 0 && bW == 1);
21712             DIP("%s.w r%u, [r%u], #%c%u\n",
21713                 nm, rT, rN, bU ? '+' : '-', imm8);
21714          }
21715
21716          goto decode_success;
21717       }
21718    }
21719
21720    /* ------------- LD/ST reg+(reg<<imm2) ------------- */
21721    /* Loads and stores of the form:
21722          op  Rt, [Rn, Rm, LSL #imm8]
21723       where op is one of
21724          ldrb ldrh ldr  ldrsb ldrsh
21725          strb strh str
21726    */
21727    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)
21728        && INSN1(11,6) == BITS6(0,0,0,0,0,0)) {
21729       Bool   valid  = True;
21730       Bool   syned  = False;
21731       Bool   isST   = False;
21732       IRType ty     = Ity_I8;
21733       const HChar* nm = "???";
21734
21735       switch (INSN0(8,4)) {
21736          case BITS5(0,0,0,0,0):   // strb
21737             nm = "strb"; isST = True; break;
21738          case BITS5(0,0,0,0,1):   // ldrb
21739             nm = "ldrb"; break;
21740          case BITS5(1,0,0,0,1):   // ldrsb
21741             nm = "ldrsb"; syned = True; break;
21742          case BITS5(0,0,0,1,0):   // strh
21743             nm = "strh"; ty = Ity_I16; isST = True; break;
21744          case BITS5(0,0,0,1,1):   // ldrh
21745             nm = "ldrh"; ty = Ity_I16; break;
21746          case BITS5(1,0,0,1,1):   // ldrsh
21747             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
21748          case BITS5(0,0,1,0,0):   // str
21749             nm = "str"; ty = Ity_I32; isST = True; break;
21750          case BITS5(0,0,1,0,1):
21751             nm = "ldr"; ty = Ity_I32; break;  // ldr
21752          default:
21753             valid = False; break;
21754       }
21755
21756       UInt rN      = INSN0(3,0);
21757       UInt rM      = INSN1(3,0);
21758       UInt rT      = INSN1(15,12);
21759       UInt imm2    = INSN1(5,4);
21760       Bool loadsPC = False;
21761
21762       if (ty == Ity_I8 || ty == Ity_I16) {
21763          /* all 8- and 16-bit load and store cases have the
21764             same exclusion set. */
21765          if (rN == 15 || isBadRegT(rT) || isBadRegT(rM))
21766             valid = False;
21767       } else {
21768          vassert(ty == Ity_I32);
21769          if (rN == 15 || isBadRegT(rM))
21770             valid = False;
21771          if (isST && rT == 15)
21772             valid = False;
21773          /* If it is a load and rT is 15, that's only allowable if we
21774             not in an IT block, or are the last in it.  Need to insert
21775             a dynamic check for that. */
21776          if (!isST && rT == 15)
21777             loadsPC = True;
21778       }
21779
21780       if (valid) {
21781          // if it's a branch, it can't happen in the middle of an IT block
21782          // Also, if it is a branch, make it unconditional at this point.
21783          // Doing conditional branches in-line is too complex (for now)
21784          if (loadsPC) {
21785             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
21786             // go uncond
21787             mk_skip_over_T32_if_cond_is_false(condT);
21788             condT = IRTemp_INVALID;
21789             // now uncond
21790          }
21791
21792          IRTemp transAddr = newTemp(Ity_I32);
21793          assign(transAddr,
21794                 binop( Iop_Add32,
21795                        getIRegT(rN),
21796                        binop(Iop_Shl32, getIRegT(rM), mkU8(imm2)) ));
21797
21798          if (isST) {
21799
21800             /* get hold of the data to be stored */
21801             IRTemp oldRt = newTemp(Ity_I32);
21802             assign(oldRt, getIRegT(rT));
21803
21804             /* generate the transfer */
21805             IRExpr* data = NULL;
21806             switch (ty) {
21807                case Ity_I8:
21808                   data = unop(Iop_32to8, mkexpr(oldRt));
21809                   break;
21810                case Ity_I16:
21811                   data = unop(Iop_32to16, mkexpr(oldRt));
21812                   break;
21813               case Ity_I32:
21814                   data = mkexpr(oldRt);
21815                   break;
21816               default:
21817                  vassert(0);
21818             }
21819             storeGuardedLE(mkexpr(transAddr), data, condT);
21820
21821          } else {
21822
21823             /* Load. */
21824             IRTemp llOldRt = newTemp(Ity_I32);
21825             assign(llOldRt, llGetIReg(rT));
21826
21827             /* generate the transfer */
21828             IRTemp    newRt = newTemp(Ity_I32);
21829             IRLoadGOp widen = ILGop_INVALID;
21830             switch (ty) {
21831                case Ity_I8:
21832                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
21833                case Ity_I16:
21834                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
21835                case Ity_I32:
21836                   widen = ILGop_Ident32; break;
21837                default:
21838                   vassert(0);
21839             }
21840             loadGuardedLE(newRt, widen,
21841                           mkexpr(transAddr), mkexpr(llOldRt), condT);
21842
21843             if (rT == 15) {
21844                vassert(loadsPC);
21845                /* We'll do the write to the PC just below */
21846             } else {
21847                vassert(!loadsPC);
21848                /* IRTemp_INVALID is OK here because in the case where
21849                   condT is false at run time, we're just putting the
21850                   old rT value back. */
21851                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21852             }
21853
21854             if (loadsPC) {
21855                /* Presumably this is an interworking branch. */
21856                vassert(rN != 15); // assured by validity check above
21857                vassert(rT == 15);
21858                vassert(condT == IRTemp_INVALID); /* due to check above */
21859                llPutIReg(15, mkexpr(newRt));
21860                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
21861                dres.whatNext    = Dis_StopHere;
21862             }
21863          }
21864
21865          DIP("%s.w r%u, [r%u, r%u, LSL #%u]\n",
21866              nm, rT, rN, rM, imm2);
21867
21868          goto decode_success;
21869       }
21870    }
21871
21872    /* --------------- LD/ST reg+imm12 --------------- */
21873    /* Loads and stores of the form:
21874          op  Rt, [Rn, #+-imm12]
21875       where op is one of
21876          ldrb ldrh ldr  ldrsb ldrsh
21877          strb strh str
21878    */
21879    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)) {
21880       Bool   valid  = True;
21881       Bool   syned  = INSN0(8,8) == 1;
21882       Bool   isST   = False;
21883       IRType ty     = Ity_I8;
21884       UInt   bU     = INSN0(7,7); // 1: +imm   0: -imm
21885                                   // -imm is only supported by literal versions
21886       const HChar* nm = "???";
21887
21888       switch (INSN0(6,4)) {
21889          case BITS3(0,0,0):   // strb
21890             nm = "strb"; isST = True; break;
21891          case BITS3(0,0,1):   // ldrb
21892             nm = syned ? "ldrsb" : "ldrb"; break;
21893          case BITS3(0,1,0):   // strh
21894             nm = "strh"; ty = Ity_I16; isST = True; break;
21895          case BITS3(0,1,1):   // ldrh
21896             nm = syned ? "ldrsh" : "ldrh"; ty = Ity_I16; break;
21897          case BITS3(1,0,0):   // str
21898             nm = "str"; ty = Ity_I32; isST = True; break;
21899          case BITS3(1,0,1):
21900             nm = "ldr"; ty = Ity_I32; break;  // ldr
21901          default:
21902             valid = False; break;
21903       }
21904
21905       UInt rN      = INSN0(3,0);
21906       UInt rT      = INSN1(15,12);
21907       UInt imm12   = INSN1(11,0);
21908       Bool loadsPC = False;
21909
21910       if (rN != 15 && bU == 0) {
21911          // only pc supports #-imm12
21912          valid = False;
21913       }
21914
21915       if (isST) {
21916          if (syned) valid = False;
21917          if (rN == 15 || rT == 15)
21918             valid = False;
21919       } else {
21920          /* For a 32-bit load, rT == 15 is only allowable if we are not
21921             in an IT block, or are the last in it.  Need to insert
21922             a dynamic check for that.  Also, in this particular
21923             case, rN == 15 is allowable.  In this case however, the
21924             value obtained for rN is (apparently)
21925             "word-align(address of current insn + 4)". */
21926          if (rT == 15) {
21927             if (ty == Ity_I32)
21928                loadsPC = True;
21929             else // Can't do it for B/H loads
21930                valid = False;
21931          }
21932       }
21933
21934       if (valid) {
21935          // if it's a branch, it can't happen in the middle of an IT block
21936          // Also, if it is a branch, make it unconditional at this point.
21937          // Doing conditional branches in-line is too complex (for now)
21938          if (loadsPC) {
21939             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
21940             // go uncond
21941             mk_skip_over_T32_if_cond_is_false(condT);
21942             condT = IRTemp_INVALID;
21943             // now uncond
21944          }
21945
21946          IRTemp rNt = newTemp(Ity_I32);
21947          if (rN == 15) {
21948             vassert(!isST);
21949             assign(rNt, binop(Iop_And32, getIRegT(15), mkU32(~3)));
21950          } else {
21951             assign(rNt, getIRegT(rN));
21952          }
21953
21954          IRTemp transAddr = newTemp(Ity_I32);
21955          assign(transAddr,
21956                 binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
21957                       mkexpr(rNt), mkU32(imm12)));
21958
21959          IRTemp oldRt = newTemp(Ity_I32);
21960          assign(oldRt, getIRegT(rT));
21961
21962          IRTemp llOldRt = newTemp(Ity_I32);
21963          assign(llOldRt, llGetIReg(rT));
21964
21965          if (isST) {
21966             IRExpr* data = NULL;
21967             switch (ty) {
21968                case Ity_I8:
21969                   data = unop(Iop_32to8, mkexpr(oldRt));
21970                   break;
21971                case Ity_I16:
21972                   data = unop(Iop_32to16, mkexpr(oldRt));
21973                   break;
21974               case Ity_I32:
21975                   data = mkexpr(oldRt);
21976                   break;
21977               default:
21978                  vassert(0);
21979             }
21980             storeGuardedLE(mkexpr(transAddr), data, condT);
21981          } else {
21982             IRTemp    newRt = newTemp(Ity_I32);
21983             IRLoadGOp widen = ILGop_INVALID;
21984             switch (ty) {
21985                case Ity_I8:
21986                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
21987                case Ity_I16:
21988                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
21989                case Ity_I32:
21990                   widen = ILGop_Ident32; break;
21991                default:
21992                   vassert(0);
21993             }
21994             loadGuardedLE(newRt, widen,
21995                           mkexpr(transAddr), mkexpr(llOldRt), condT);
21996             if (rT == 15) {
21997                vassert(loadsPC);
21998                /* We'll do the write to the PC just below */
21999             } else {
22000                vassert(!loadsPC);
22001                /* IRTemp_INVALID is OK here because in the case where
22002                   condT is false at run time, we're just putting the
22003                   old rT value back. */
22004                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
22005             }
22006
22007             if (loadsPC) {
22008                /* Presumably this is an interworking branch. */
22009                vassert(rT == 15);
22010                vassert(condT == IRTemp_INVALID); /* due to check above */
22011                llPutIReg(15, mkexpr(newRt));
22012                dres.jk_StopHere = Ijk_Boring;
22013                dres.whatNext    = Dis_StopHere;
22014             }
22015          }
22016
22017          DIP("%s.w r%u, [r%u, +#%u]\n", nm, rT, rN, imm12);
22018
22019          goto decode_success;
22020       }
22021    }
22022
22023    /* -------------- LDRD/STRD reg+/-#imm8 -------------- */
22024    /* Doubleword loads and stores of the form:
22025          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]    or
22026          ldrd/strd  Rt, Rt2, [Rn], #+/-imm8    or
22027          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]!
22028    */
22029    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,0) && INSN0(6,6) == 1) {
22030       UInt bP   = INSN0(8,8);
22031       UInt bU   = INSN0(7,7);
22032       UInt bW   = INSN0(5,5);
22033       UInt bL   = INSN0(4,4);  // 1: load  0: store
22034       UInt rN   = INSN0(3,0);
22035       UInt rT   = INSN1(15,12);
22036       UInt rT2  = INSN1(11,8);
22037       UInt imm8 = INSN1(7,0);
22038
22039       Bool valid = True;
22040       if (bP == 0 && bW == 0)                 valid = False;
22041       if (bW == 1 && (rN == rT || rN == rT2)) valid = False;
22042       if (isBadRegT(rT) || isBadRegT(rT2))    valid = False;
22043       if (bL == 1 && rT == rT2)               valid = False;
22044       /* It's OK to use PC as the base register only in the
22045          following case: ldrd Rt, Rt2, [PC, #+/-imm8] */
22046       if (rN == 15 && (bL == 0/*store*/
22047                        || bW == 1/*wb*/))     valid = False;
22048
22049       if (valid) {
22050          IRTemp preAddr = newTemp(Ity_I32);
22051          assign(preAddr, 15 == rN
22052                            ? binop(Iop_And32, getIRegT(15), mkU32(~3U))
22053                            : getIRegT(rN));
22054
22055          IRTemp postAddr = newTemp(Ity_I32);
22056          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
22057                                 mkexpr(preAddr), mkU32(imm8 << 2)));
22058
22059          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
22060
22061          /* For almost all cases, we do the writeback after the transfers.
22062             However, that leaves the stack "uncovered" in cases like:
22063                strd    rD, [sp, #-8]
22064                strd    rD, [sp, #-16]
22065             In which case, do the writeback to SP now, instead of later.
22066             This is bad in that it makes the insn non-restartable if the
22067             accesses fault, but at least keeps Memcheck happy. */
22068          Bool writeback_already_done = False;
22069          if (bL == 0/*store*/ && bW == 1/*wb*/
22070              && rN == 13 && rN != rT && rN != rT2
22071              && bU == 0/*minus*/
22072              && ((imm8 << 2) == 8 || (imm8 << 2) == 16)) {
22073             putIRegT(rN, mkexpr(postAddr), condT);
22074             writeback_already_done = True;
22075          }
22076
22077          if (bL == 0) {
22078             IRTemp oldRt  = newTemp(Ity_I32);
22079             IRTemp oldRt2 = newTemp(Ity_I32);
22080             assign(oldRt,  getIRegT(rT));
22081             assign(oldRt2, getIRegT(rT2));
22082             storeGuardedLE( mkexpr(transAddr),
22083                             mkexpr(oldRt), condT );
22084             storeGuardedLE( binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
22085                             mkexpr(oldRt2), condT );
22086          } else {
22087             IRTemp oldRt  = newTemp(Ity_I32);
22088             IRTemp oldRt2 = newTemp(Ity_I32);
22089             IRTemp newRt  = newTemp(Ity_I32);
22090             IRTemp newRt2 = newTemp(Ity_I32);
22091             assign(oldRt,  llGetIReg(rT));
22092             assign(oldRt2, llGetIReg(rT2));
22093             loadGuardedLE( newRt, ILGop_Ident32,
22094                            mkexpr(transAddr),
22095                            mkexpr(oldRt), condT );
22096             loadGuardedLE( newRt2, ILGop_Ident32,
22097                            binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
22098                            mkexpr(oldRt2), condT );
22099             /* Put unconditionally, since we already switched on the condT
22100                in the guarded loads. */
22101             putIRegT(rT,  mkexpr(newRt),  IRTemp_INVALID);
22102             putIRegT(rT2, mkexpr(newRt2), IRTemp_INVALID);
22103          }
22104
22105          if (bW == 1 && !writeback_already_done) {
22106             putIRegT(rN, mkexpr(postAddr), condT);
22107          }
22108
22109          const HChar* nm = bL ? "ldrd" : "strd";
22110
22111          if (bP == 1 && bW == 0) {
22112             DIP("%s.w r%u, r%u, [r%u, #%c%u]\n",
22113                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
22114          }
22115          else if (bP == 1 && bW == 1) {
22116             DIP("%s.w r%u, r%u, [r%u, #%c%u]!\n",
22117                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
22118          }
22119          else {
22120             vassert(bP == 0 && bW == 1);
22121             DIP("%s.w r%u, r%u, [r%u], #%c%u\n",
22122                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
22123          }
22124
22125          goto decode_success;
22126       }
22127    }
22128
22129    /* -------------- (T3) Bcond.W label -------------- */
22130    /* This variant carries its own condition, so can't be part of an
22131       IT block ... */
22132    if (INSN0(15,11) == BITS5(1,1,1,1,0)
22133        && INSN1(15,14) == BITS2(1,0)
22134        && INSN1(12,12) == 0) {
22135       UInt cond = INSN0(9,6);
22136       if (cond != ARMCondAL && cond != ARMCondNV) {
22137          UInt uimm21
22138             =   (INSN0(10,10) << (1 + 1 + 6 + 11 + 1))
22139               | (INSN1(11,11) << (1 + 6 + 11 + 1))
22140               | (INSN1(13,13) << (6 + 11 + 1))
22141               | (INSN0(5,0)   << (11 + 1))
22142               | (INSN1(10,0)  << 1);
22143          uimm21 <<= 11;
22144          Int simm21 = (Int)uimm21;
22145          simm21 >>= 11;
22146
22147          vassert(0 == (guest_R15_curr_instr_notENC & 1));
22148          UInt dst = simm21 + guest_R15_curr_instr_notENC + 4;
22149
22150          /* Not allowed in an IT block; SIGILL if so. */
22151          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
22152
22153          IRTemp kondT = newTemp(Ity_I32);
22154          assign( kondT, mk_armg_calculate_condition(cond) );
22155          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
22156                             Ijk_Boring,
22157                             IRConst_U32(dst | 1/*CPSR.T*/),
22158                             OFFB_R15T ));
22159          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 4)
22160                               | 1 /*CPSR.T*/ ));
22161          dres.jk_StopHere = Ijk_Boring;
22162          dres.whatNext    = Dis_StopHere;
22163          DIP("b%s.w 0x%x\n", nCC(cond), dst);
22164          goto decode_success;
22165       }
22166    }
22167
22168    /* ---------------- (T4) B.W label ---------------- */
22169    /* ... whereas this variant doesn't carry its own condition, so it
22170       has to be either unconditional or the conditional by virtue of
22171       being the last in an IT block.  The upside is that there's 4
22172       more bits available for the jump offset, so it has a 16-times
22173       greater branch range than the T3 variant. */
22174    if (INSN0(15,11) == BITS5(1,1,1,1,0)
22175        && INSN1(15,14) == BITS2(1,0)
22176        && INSN1(12,12) == 1) {
22177       if (1) {
22178          UInt bS  = INSN0(10,10);
22179          UInt bJ1 = INSN1(13,13);
22180          UInt bJ2 = INSN1(11,11);
22181          UInt bI1 = 1 ^ (bJ1 ^ bS);
22182          UInt bI2 = 1 ^ (bJ2 ^ bS);
22183          UInt uimm25
22184             =   (bS          << (1 + 1 + 10 + 11 + 1))
22185               | (bI1         << (1 + 10 + 11 + 1))
22186               | (bI2         << (10 + 11 + 1))
22187               | (INSN0(9,0)  << (11 + 1))
22188               | (INSN1(10,0) << 1);
22189          uimm25 <<= 7;
22190          Int simm25 = (Int)uimm25;
22191          simm25 >>= 7;
22192
22193          vassert(0 == (guest_R15_curr_instr_notENC & 1));
22194          UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
22195
22196          /* If in an IT block, must be the last insn. */
22197          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
22198
22199          // go uncond
22200          mk_skip_over_T32_if_cond_is_false(condT);
22201          condT = IRTemp_INVALID;
22202          // now uncond
22203
22204          // branch to dst
22205          llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
22206          dres.jk_StopHere = Ijk_Boring;
22207          dres.whatNext    = Dis_StopHere;
22208          DIP("b.w 0x%x\n", dst);
22209          goto decode_success;
22210       }
22211    }
22212
22213    /* ------------------ TBB, TBH ------------------ */
22214    if (INSN0(15,4) == 0xE8D && INSN1(15,5) == 0x780) {
22215       UInt rN = INSN0(3,0);
22216       UInt rM = INSN1(3,0);
22217       UInt bH = INSN1(4,4);
22218       if (bH/*ATC*/ || (rN != 13 && !isBadRegT(rM))) {
22219          /* Must be last or not-in IT block */
22220          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
22221          /* Go uncond */
22222          mk_skip_over_T32_if_cond_is_false(condT);
22223          condT = IRTemp_INVALID;
22224
22225          IRExpr* ea
22226              = binop(Iop_Add32,
22227                      getIRegT(rN),
22228                      bH ? binop(Iop_Shl32, getIRegT(rM), mkU8(1))
22229                         : getIRegT(rM));
22230
22231          IRTemp delta = newTemp(Ity_I32);
22232          if (bH) {
22233             assign(delta, unop(Iop_16Uto32, loadLE(Ity_I16, ea)));
22234          } else {
22235             assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
22236          }
22237
22238          llPutIReg(
22239             15,
22240             binop(Iop_Or32,
22241                   binop(Iop_Add32,
22242                         getIRegT(15),
22243                         binop(Iop_Shl32, mkexpr(delta), mkU8(1))
22244                   ),
22245                   mkU32(1)
22246          ));
22247          dres.jk_StopHere = Ijk_Boring;
22248          dres.whatNext    = Dis_StopHere;
22249          DIP("tb%c [r%u, r%u%s]\n",
22250              bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
22251          goto decode_success;
22252       }
22253    }
22254
22255    /* ------------------ UBFX ------------------ */
22256    /* ------------------ SBFX ------------------ */
22257    /* There's also ARM versions of same, but it doesn't seem worth the
22258       hassle to common up the handling (it's only a couple of C
22259       statements). */
22260    if ((INSN0(15,4) == 0xF3C // UBFX
22261         || INSN0(15,4) == 0xF34) // SBFX
22262        && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
22263       UInt rN  = INSN0(3,0);
22264       UInt rD  = INSN1(11,8);
22265       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
22266       UInt wm1 = INSN1(4,0);
22267       UInt msb =  lsb + wm1;
22268       if (!isBadRegT(rD) && !isBadRegT(rN) && msb <= 31) {
22269          Bool   isU  = INSN0(15,4) == 0xF3C;
22270          IRTemp src  = newTemp(Ity_I32);
22271          IRTemp tmp  = newTemp(Ity_I32);
22272          IRTemp res  = newTemp(Ity_I32);
22273          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
22274          vassert(msb >= 0 && msb <= 31);
22275          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
22276
22277          assign(src, getIRegT(rN));
22278          assign(tmp, binop(Iop_And32,
22279                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
22280                            mkU32(mask)));
22281          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
22282                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
22283                            mkU8(31-wm1)));
22284
22285          putIRegT(rD, mkexpr(res), condT);
22286
22287          DIP("%s r%u, r%u, #%u, #%u\n",
22288              isU ? "ubfx" : "sbfx", rD, rN, lsb, wm1 + 1);
22289          goto decode_success;
22290       }
22291    }
22292
22293    /* ------------------ UXTB ------------------ */
22294    /* ------------------ UXTH ------------------ */
22295    /* ------------------ SXTB ------------------ */
22296    /* ------------------ SXTH ------------------ */
22297    /* ----------------- UXTB16 ----------------- */
22298    /* ----------------- SXTB16 ----------------- */
22299    /* FIXME: this is an exact duplicate of the ARM version.  They
22300       should be commoned up. */
22301    if ((INSN0(15,0) == 0xFA5F     // UXTB
22302         || INSN0(15,0) == 0xFA1F  // UXTH
22303         || INSN0(15,0) == 0xFA4F  // SXTB
22304         || INSN0(15,0) == 0xFA0F  // SXTH
22305         || INSN0(15,0) == 0xFA3F  // UXTB16
22306         || INSN0(15,0) == 0xFA2F) // SXTB16
22307        && INSN1(15,12) == BITS4(1,1,1,1)
22308        && INSN1(7,6) == BITS2(1,0)) {
22309       UInt rD = INSN1(11,8);
22310       UInt rM = INSN1(3,0);
22311       UInt rot = INSN1(5,4);
22312       if (!isBadRegT(rD) && !isBadRegT(rM)) {
22313          const HChar* nm = "???";
22314          IRTemp srcT = newTemp(Ity_I32);
22315          IRTemp rotT = newTemp(Ity_I32);
22316          IRTemp dstT = newTemp(Ity_I32);
22317          assign(srcT, getIRegT(rM));
22318          assign(rotT, genROR32(srcT, 8 * rot));
22319          switch (INSN0(15,0)) {
22320             case 0xFA5F: // UXTB
22321                nm = "uxtb";
22322                assign(dstT, unop(Iop_8Uto32,
22323                                  unop(Iop_32to8, mkexpr(rotT))));
22324                break;
22325             case 0xFA1F: // UXTH
22326                nm = "uxth";
22327                assign(dstT, unop(Iop_16Uto32,
22328                                  unop(Iop_32to16, mkexpr(rotT))));
22329                break;
22330             case 0xFA4F: // SXTB
22331                nm = "sxtb";
22332                assign(dstT, unop(Iop_8Sto32,
22333                                  unop(Iop_32to8, mkexpr(rotT))));
22334                break;
22335             case 0xFA0F: // SXTH
22336                nm = "sxth";
22337                assign(dstT, unop(Iop_16Sto32,
22338                                  unop(Iop_32to16, mkexpr(rotT))));
22339                break;
22340             case 0xFA3F: // UXTB16
22341                nm = "uxtb16";
22342                assign(dstT, binop(Iop_And32, mkexpr(rotT),
22343                                              mkU32(0x00FF00FF)));
22344                break;
22345             case 0xFA2F: { // SXTB16
22346                nm = "sxtb16";
22347                IRTemp lo32 = newTemp(Ity_I32);
22348                IRTemp hi32 = newTemp(Ity_I32);
22349                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
22350                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
22351                assign(
22352                   dstT,
22353                   binop(Iop_Or32,
22354                         binop(Iop_And32,
22355                               unop(Iop_8Sto32,
22356                                    unop(Iop_32to8, mkexpr(lo32))),
22357                               mkU32(0xFFFF)),
22358                         binop(Iop_Shl32,
22359                               unop(Iop_8Sto32,
22360                                    unop(Iop_32to8, mkexpr(hi32))),
22361                               mkU8(16))
22362                ));
22363                break;
22364             }
22365             default:
22366                vassert(0);
22367          }
22368          putIRegT(rD, mkexpr(dstT), condT);
22369          DIP("%s r%u, r%u, ror #%u\n", nm, rD, rM, 8 * rot);
22370          goto decode_success;
22371       }
22372    }
22373
22374    /* -------------- MUL.W Rd, Rn, Rm -------------- */
22375    if (INSN0(15,4) == 0xFB0
22376        && (INSN1(15,0) & 0xF0F0) == 0xF000) {
22377       UInt rN = INSN0(3,0);
22378       UInt rD = INSN1(11,8);
22379       UInt rM = INSN1(3,0);
22380       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22381          IRTemp res = newTemp(Ity_I32);
22382          assign(res, binop(Iop_Mul32, getIRegT(rN), getIRegT(rM)));
22383          putIRegT(rD, mkexpr(res), condT);
22384          DIP("mul.w r%u, r%u, r%u\n", rD, rN, rM);
22385          goto decode_success;
22386       }
22387    }
22388
22389    /* -------------- SDIV.W Rd, Rn, Rm -------------- */
22390    if (INSN0(15,4) == 0xFB9
22391        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
22392       UInt rN = INSN0(3,0);
22393       UInt rD = INSN1(11,8);
22394       UInt rM = INSN1(3,0);
22395       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22396          IRTemp res  = newTemp(Ity_I32);
22397          IRTemp argL = newTemp(Ity_I32);
22398          IRTemp argR = newTemp(Ity_I32);
22399          assign(argL, getIRegT(rN));
22400          assign(argR, getIRegT(rM));
22401          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
22402          putIRegT(rD, mkexpr(res), condT);
22403          DIP("sdiv.w r%u, r%u, r%u\n", rD, rN, rM);
22404          goto decode_success;
22405       }
22406    }
22407
22408    /* -------------- UDIV.W Rd, Rn, Rm -------------- */
22409    if (INSN0(15,4) == 0xFBB
22410        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
22411       UInt rN = INSN0(3,0);
22412       UInt rD = INSN1(11,8);
22413       UInt rM = INSN1(3,0);
22414       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22415          IRTemp res  = newTemp(Ity_I32);
22416          IRTemp argL = newTemp(Ity_I32);
22417          IRTemp argR = newTemp(Ity_I32);
22418          assign(argL, getIRegT(rN));
22419          assign(argR, getIRegT(rM));
22420          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
22421          putIRegT(rD, mkexpr(res), condT);
22422          DIP("udiv.w r%u, r%u, r%u\n", rD, rN, rM);
22423          goto decode_success;
22424       }
22425    }
22426
22427    /* ------------------ {U,S}MULL ------------------ */
22428    if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
22429        && INSN1(7,4) == BITS4(0,0,0,0)) {
22430       UInt isU  = INSN0(5,5);
22431       UInt rN   = INSN0(3,0);
22432       UInt rDlo = INSN1(15,12);
22433       UInt rDhi = INSN1(11,8);
22434       UInt rM   = INSN1(3,0);
22435       if (!isBadRegT(rDhi) && !isBadRegT(rDlo)
22436           && !isBadRegT(rN) && !isBadRegT(rM) && rDlo != rDhi) {
22437          IRTemp res   = newTemp(Ity_I64);
22438          assign(res, binop(isU ? Iop_MullU32 : Iop_MullS32,
22439                            getIRegT(rN), getIRegT(rM)));
22440          putIRegT( rDhi, unop(Iop_64HIto32, mkexpr(res)), condT );
22441          putIRegT( rDlo, unop(Iop_64to32, mkexpr(res)), condT );
22442          DIP("%cmull r%u, r%u, r%u, r%u\n",
22443              isU ? 'u' : 's', rDlo, rDhi, rN, rM);
22444          goto decode_success;
22445       }
22446    }
22447
22448    /* ------------------ ML{A,S} ------------------ */
22449    if (INSN0(15,4) == 0xFB0
22450        && (   INSN1(7,4) == BITS4(0,0,0,0)    // MLA
22451            || INSN1(7,4) == BITS4(0,0,0,1))) { // MLS
22452       UInt rN = INSN0(3,0);
22453       UInt rA = INSN1(15,12);
22454       UInt rD = INSN1(11,8);
22455       UInt rM = INSN1(3,0);
22456       if (!isBadRegT(rD) && !isBadRegT(rN)
22457           && !isBadRegT(rM) && !isBadRegT(rA)) {
22458          Bool   isMLA = INSN1(7,4) == BITS4(0,0,0,0);
22459          IRTemp res   = newTemp(Ity_I32);
22460          assign(res,
22461                 binop(isMLA ? Iop_Add32 : Iop_Sub32,
22462                       getIRegT(rA),
22463                       binop(Iop_Mul32, getIRegT(rN), getIRegT(rM))));
22464          putIRegT(rD, mkexpr(res), condT);
22465          DIP("%s r%u, r%u, r%u, r%u\n",
22466              isMLA ? "mla" : "mls", rD, rN, rM, rA);
22467          goto decode_success;
22468       }
22469    }
22470
22471    /* ------------------ (T3) ADR ------------------ */
22472    if ((INSN0(15,0) == 0xF20F || INSN0(15,0) == 0xF60F)
22473        && INSN1(15,15) == 0) {
22474       /* rD = align4(PC) + imm32 */
22475       UInt rD = INSN1(11,8);
22476       if (!isBadRegT(rD)) {
22477          UInt imm32 = (INSN0(10,10) << 11)
22478                       | (INSN1(14,12) << 8) | INSN1(7,0);
22479          putIRegT(rD, binop(Iop_Add32,
22480                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
22481                             mkU32(imm32)),
22482                       condT);
22483          DIP("add r%u, pc, #%u\n", rD, imm32);
22484          goto decode_success;
22485       }
22486    }
22487
22488    /* ----------------- (T1) UMLAL ----------------- */
22489    /* ----------------- (T1) SMLAL ----------------- */
22490    if ((INSN0(15,4) == 0xFBE // UMLAL
22491         || INSN0(15,4) == 0xFBC) // SMLAL
22492        && INSN1(7,4) == BITS4(0,0,0,0)) {
22493       UInt rN   = INSN0(3,0);
22494       UInt rDlo = INSN1(15,12);
22495       UInt rDhi = INSN1(11,8);
22496       UInt rM   = INSN1(3,0);
22497       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
22498           && !isBadRegT(rM) && rDhi != rDlo) {
22499          Bool   isS   = INSN0(15,4) == 0xFBC;
22500          IRTemp argL  = newTemp(Ity_I32);
22501          IRTemp argR  = newTemp(Ity_I32);
22502          IRTemp old   = newTemp(Ity_I64);
22503          IRTemp res   = newTemp(Ity_I64);
22504          IRTemp resHi = newTemp(Ity_I32);
22505          IRTemp resLo = newTemp(Ity_I32);
22506          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
22507          assign( argL, getIRegT(rM));
22508          assign( argR, getIRegT(rN));
22509          assign( old, binop(Iop_32HLto64, getIRegT(rDhi), getIRegT(rDlo)) );
22510          assign( res, binop(Iop_Add64,
22511                             mkexpr(old),
22512                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
22513          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
22514          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
22515          putIRegT( rDhi, mkexpr(resHi), condT );
22516          putIRegT( rDlo, mkexpr(resLo), condT );
22517          DIP("%cmlal r%u, r%u, r%u, r%u\n",
22518              isS ? 's' : 'u', rDlo, rDhi, rN, rM);
22519          goto decode_success;
22520       }
22521    }
22522
22523    /* ------------------ (T1) UMAAL ------------------ */
22524    if (INSN0(15,4) == 0xFBE && INSN1(7,4) == BITS4(0,1,1,0)) {
22525       UInt rN   = INSN0(3,0);
22526       UInt rDlo = INSN1(15,12);
22527       UInt rDhi = INSN1(11,8);
22528       UInt rM   = INSN1(3,0);
22529       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
22530           && !isBadRegT(rM) && rDhi != rDlo) {
22531          IRTemp argN   = newTemp(Ity_I32);
22532          IRTemp argM   = newTemp(Ity_I32);
22533          IRTemp argDhi = newTemp(Ity_I32);
22534          IRTemp argDlo = newTemp(Ity_I32);
22535          IRTemp res    = newTemp(Ity_I64);
22536          IRTemp resHi  = newTemp(Ity_I32);
22537          IRTemp resLo  = newTemp(Ity_I32);
22538          assign( argN,   getIRegT(rN) );
22539          assign( argM,   getIRegT(rM) );
22540          assign( argDhi, getIRegT(rDhi) );
22541          assign( argDlo, getIRegT(rDlo) );
22542          assign( res,
22543                  binop(Iop_Add64,
22544                        binop(Iop_Add64,
22545                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
22546                              unop(Iop_32Uto64, mkexpr(argDhi))),
22547                        unop(Iop_32Uto64, mkexpr(argDlo))) );
22548          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
22549          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
22550          putIRegT( rDhi, mkexpr(resHi), condT );
22551          putIRegT( rDlo, mkexpr(resLo), condT );
22552          DIP("umaal r%u, r%u, r%u, r%u\n", rDlo, rDhi, rN, rM);
22553          goto decode_success;
22554       }
22555    }
22556
22557    /* ------------------- (T1) SMMUL{R} ------------------ */
22558    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
22559        && INSN0(6,4) == BITS3(1,0,1)
22560        && INSN1(15,12) == BITS4(1,1,1,1)
22561        && INSN1(7,5) == BITS3(0,0,0)) {
22562       UInt bitR = INSN1(4,4);
22563       UInt rD = INSN1(11,8);
22564       UInt rM = INSN1(3,0);
22565       UInt rN = INSN0(3,0);
22566       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22567          IRExpr* res
22568          = unop(Iop_64HIto32,
22569                 binop(Iop_Add64,
22570                       binop(Iop_MullS32, getIRegT(rN), getIRegT(rM)),
22571                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
22572          putIRegT(rD, res, condT);
22573          DIP("smmul%s r%u, r%u, r%u\n",
22574              bitR ? "r" : "", rD, rN, rM);
22575          goto decode_success;
22576       }
22577    }
22578
22579    /* ------------------- (T1) SMMLA{R} ------------------ */
22580    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
22581        && INSN0(6,4) == BITS3(1,0,1)
22582        && INSN1(7,5) == BITS3(0,0,0)) {
22583       UInt bitR = INSN1(4,4);
22584       UInt rA = INSN1(15,12);
22585       UInt rD = INSN1(11,8);
22586       UInt rM = INSN1(3,0);
22587       UInt rN = INSN0(3,0);
22588       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && (rA != 13)) {
22589          IRExpr* res
22590          = unop(Iop_64HIto32,
22591                 binop(Iop_Add64,
22592                       binop(Iop_Add64,
22593                             binop(Iop_32HLto64, getIRegT(rA), mkU32(0)),
22594                             binop(Iop_MullS32, getIRegT(rN), getIRegT(rM))),
22595                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
22596          putIRegT(rD, res, condT);
22597          DIP("smmla%s r%u, r%u, r%u, r%u\n",
22598              bitR ? "r" : "", rD, rN, rM, rA);
22599          goto decode_success;
22600       }
22601    }
22602
22603    /* ------------------ (T2) ADR ------------------ */
22604    if ((INSN0(15,0) == 0xF2AF || INSN0(15,0) == 0xF6AF)
22605        && INSN1(15,15) == 0) {
22606       /* rD = align4(PC) - imm32 */
22607       UInt rD = INSN1(11,8);
22608       if (!isBadRegT(rD)) {
22609          UInt imm32 = (INSN0(10,10) << 11)
22610                       | (INSN1(14,12) << 8) | INSN1(7,0);
22611          putIRegT(rD, binop(Iop_Sub32,
22612                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
22613                             mkU32(imm32)),
22614                       condT);
22615          DIP("sub r%u, pc, #%u\n", rD, imm32);
22616          goto decode_success;
22617       }
22618    }
22619
22620    /* ------------------- (T1) BFI ------------------- */
22621    /* ------------------- (T1) BFC ------------------- */
22622    if (INSN0(15,4) == 0xF36 && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
22623       UInt rD  = INSN1(11,8);
22624       UInt rN  = INSN0(3,0);
22625       UInt msb = INSN1(4,0);
22626       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
22627       if (isBadRegT(rD) || rN == 13 || msb < lsb) {
22628          /* undecodable; fall through */
22629       } else {
22630          IRTemp src    = newTemp(Ity_I32);
22631          IRTemp olddst = newTemp(Ity_I32);
22632          IRTemp newdst = newTemp(Ity_I32);
22633          UInt   mask   = ((UInt)1) << (msb - lsb);
22634          mask = (mask - 1) + mask;
22635          vassert(mask != 0); // guaranteed by "msb < lsb" check above
22636          mask <<= lsb;
22637
22638          assign(src, rN == 15 ? mkU32(0) : getIRegT(rN));
22639          assign(olddst, getIRegT(rD));
22640          assign(newdst,
22641                 binop(Iop_Or32,
22642                    binop(Iop_And32,
22643                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
22644                          mkU32(mask)),
22645                    binop(Iop_And32,
22646                          mkexpr(olddst),
22647                          mkU32(~mask)))
22648                );
22649
22650          putIRegT(rD, mkexpr(newdst), condT);
22651
22652          if (rN == 15) {
22653             DIP("bfc r%u, #%u, #%u\n",
22654                 rD, lsb, msb-lsb+1);
22655          } else {
22656             DIP("bfi r%u, r%u, #%u, #%u\n",
22657                 rD, rN, lsb, msb-lsb+1);
22658          }
22659          goto decode_success;
22660       }
22661    }
22662
22663    /* ------------------- (T1) SXTAH ------------------- */
22664    /* ------------------- (T1) UXTAH ------------------- */
22665    if ((INSN0(15,4) == 0xFA1      // UXTAH
22666         || INSN0(15,4) == 0xFA0)  // SXTAH
22667        && INSN1(15,12) == BITS4(1,1,1,1)
22668        && INSN1(7,6) == BITS2(1,0)) {
22669       Bool isU = INSN0(15,4) == 0xFA1;
22670       UInt rN  = INSN0(3,0);
22671       UInt rD  = INSN1(11,8);
22672       UInt rM  = INSN1(3,0);
22673       UInt rot = INSN1(5,4);
22674       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22675          IRTemp srcL = newTemp(Ity_I32);
22676          IRTemp srcR = newTemp(Ity_I32);
22677          IRTemp res  = newTemp(Ity_I32);
22678          assign(srcR, getIRegT(rM));
22679          assign(srcL, getIRegT(rN));
22680          assign(res,  binop(Iop_Add32,
22681                             mkexpr(srcL),
22682                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
22683                                  unop(Iop_32to16,
22684                                       genROR32(srcR, 8 * rot)))));
22685          putIRegT(rD, mkexpr(res), condT);
22686          DIP("%cxtah r%u, r%u, r%u, ror #%u\n",
22687              isU ? 'u' : 's', rD, rN, rM, rot);
22688          goto decode_success;
22689       }
22690    }
22691
22692    /* ------------------- (T1) SXTAB ------------------- */
22693    /* ------------------- (T1) UXTAB ------------------- */
22694    if ((INSN0(15,4) == 0xFA5      // UXTAB
22695         || INSN0(15,4) == 0xFA4)  // SXTAB
22696        && INSN1(15,12) == BITS4(1,1,1,1)
22697        && INSN1(7,6) == BITS2(1,0)) {
22698       Bool isU = INSN0(15,4) == 0xFA5;
22699       UInt rN  = INSN0(3,0);
22700       UInt rD  = INSN1(11,8);
22701       UInt rM  = INSN1(3,0);
22702       UInt rot = INSN1(5,4);
22703       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22704          IRTemp srcL = newTemp(Ity_I32);
22705          IRTemp srcR = newTemp(Ity_I32);
22706          IRTemp res  = newTemp(Ity_I32);
22707          assign(srcR, getIRegT(rM));
22708          assign(srcL, getIRegT(rN));
22709          assign(res,  binop(Iop_Add32,
22710                             mkexpr(srcL),
22711                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
22712                                  unop(Iop_32to8,
22713                                       genROR32(srcR, 8 * rot)))));
22714          putIRegT(rD, mkexpr(res), condT);
22715          DIP("%cxtab r%u, r%u, r%u, ror #%u\n",
22716              isU ? 'u' : 's', rD, rN, rM, rot);
22717          goto decode_success;
22718       }
22719    }
22720
22721    /* ------------------- (T1) CLZ ------------------- */
22722    if (INSN0(15,4) == 0xFAB
22723        && INSN1(15,12) == BITS4(1,1,1,1)
22724        && INSN1(7,4) == BITS4(1,0,0,0)) {
22725       UInt rM1 = INSN0(3,0);
22726       UInt rD  = INSN1(11,8);
22727       UInt rM2 = INSN1(3,0);
22728       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22729          IRTemp arg = newTemp(Ity_I32);
22730          IRTemp res = newTemp(Ity_I32);
22731          assign(arg, getIRegT(rM1));
22732          assign(res, IRExpr_ITE(
22733                         binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
22734                         mkU32(32),
22735                         unop(Iop_Clz32, mkexpr(arg))
22736          ));
22737          putIRegT(rD, mkexpr(res), condT);
22738          DIP("clz r%u, r%u\n", rD, rM1);
22739          goto decode_success;
22740       }
22741    }
22742
22743    /* ------------------- (T1) RBIT ------------------- */
22744    if (INSN0(15,4) == 0xFA9
22745        && INSN1(15,12) == BITS4(1,1,1,1)
22746        && INSN1(7,4) == BITS4(1,0,1,0)) {
22747       UInt rM1 = INSN0(3,0);
22748       UInt rD  = INSN1(11,8);
22749       UInt rM2 = INSN1(3,0);
22750       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22751          IRTemp arg = newTemp(Ity_I32);
22752          assign(arg, getIRegT(rM1));
22753          IRTemp res = gen_BITREV(arg);
22754          putIRegT(rD, mkexpr(res), condT);
22755          DIP("rbit r%u, r%u\n", rD, rM1);
22756          goto decode_success;
22757       }
22758    }
22759
22760    /* ------------------- (T2) REV   ------------------- */
22761    /* ------------------- (T2) REV16 ------------------- */
22762    if (INSN0(15,4) == 0xFA9
22763        && INSN1(15,12) == BITS4(1,1,1,1)
22764        && (   INSN1(7,4) == BITS4(1,0,0,0)     // REV
22765            || INSN1(7,4) == BITS4(1,0,0,1))) { // REV16
22766       UInt rM1   = INSN0(3,0);
22767       UInt rD    = INSN1(11,8);
22768       UInt rM2   = INSN1(3,0);
22769       Bool isREV = INSN1(7,4) == BITS4(1,0,0,0);
22770       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22771          IRTemp arg = newTemp(Ity_I32);
22772          assign(arg, getIRegT(rM1));
22773          IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
22774          putIRegT(rD, mkexpr(res), condT);
22775          DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM1);
22776          goto decode_success;
22777       }
22778    }
22779
22780    /* ------------------- (T2) REVSH ------------------ */
22781    if (INSN0(15,4) == 0xFA9
22782        && INSN1(15,12) == BITS4(1,1,1,1)
22783        && INSN1(7,4) == BITS4(1,0,1,1)) {
22784       UInt rM1 = INSN0(3,0);
22785       UInt rM2 = INSN1(3,0);
22786       UInt rD  = INSN1(11,8);
22787       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22788          IRTemp irt_rM  = newTemp(Ity_I32);
22789          IRTemp irt_hi  = newTemp(Ity_I32);
22790          IRTemp irt_low = newTemp(Ity_I32);
22791          IRTemp irt_res = newTemp(Ity_I32);
22792          assign(irt_rM, getIRegT(rM1));
22793          assign(irt_hi,
22794                 binop(Iop_Sar32,
22795                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
22796                       mkU8(16)
22797                 )
22798          );
22799          assign(irt_low,
22800                 binop(Iop_And32,
22801                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
22802                       mkU32(0xFF)
22803                 )
22804          );
22805          assign(irt_res,
22806                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
22807          );
22808          putIRegT(rD, mkexpr(irt_res), condT);
22809          DIP("revsh r%u, r%u\n", rD, rM1);
22810          goto decode_success;
22811       }
22812    }
22813
22814    /* -------------- (T1) MSR apsr, reg -------------- */
22815    if (INSN0(15,4) == 0xF38
22816        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(9,0) == 0x000) {
22817       UInt rN          = INSN0(3,0);
22818       UInt write_ge    = INSN1(10,10);
22819       UInt write_nzcvq = INSN1(11,11);
22820       if (!isBadRegT(rN) && (write_nzcvq || write_ge)) {
22821          IRTemp rNt = newTemp(Ity_I32);
22822          assign(rNt, getIRegT(rN));
22823          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
22824          DIP("msr cpsr_%s%s, r%u\n",
22825              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
22826          goto decode_success;
22827       }
22828    }
22829
22830    /* -------------- (T1) MRS reg, apsr -------------- */
22831    if (INSN0(15,0) == 0xF3EF
22832        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(7,0) == 0x00) {
22833       UInt rD = INSN1(11,8);
22834       if (!isBadRegT(rD)) {
22835          IRTemp apsr = synthesise_APSR();
22836          putIRegT( rD, mkexpr(apsr), condT );
22837          DIP("mrs r%u, cpsr\n", rD);
22838          goto decode_success;
22839       }
22840    }
22841
22842    /* ----------------- (T1) LDREX ----------------- */
22843    if (INSN0(15,4) == 0xE85 && INSN1(11,8) == BITS4(1,1,1,1)) {
22844       UInt rN   = INSN0(3,0);
22845       UInt rT   = INSN1(15,12);
22846       UInt imm8 = INSN1(7,0);
22847       if (!isBadRegT(rT) && rN != 15) {
22848          IRTemp res;
22849          // go uncond
22850          mk_skip_over_T32_if_cond_is_false( condT );
22851          // now uncond
22852          res = newTemp(Ity_I32);
22853          stmt( IRStmt_LLSC(Iend_LE,
22854                            res,
22855                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
22856                            NULL/*this is a load*/ ));
22857          putIRegT(rT, mkexpr(res), IRTemp_INVALID);
22858          DIP("ldrex r%u, [r%u, #+%u]\n", rT, rN, imm8 * 4);
22859          goto decode_success;
22860       }
22861    }
22862
22863    /* --------------- (T1) LDREX{B,H} --------------- */
22864    if (INSN0(15,4) == 0xE8D
22865        && (INSN1(11,0) == 0xF4F || INSN1(11,0) == 0xF5F)) {
22866       UInt rN  = INSN0(3,0);
22867       UInt rT  = INSN1(15,12);
22868       Bool isH = INSN1(11,0) == 0xF5F;
22869       if (!isBadRegT(rT) && rN != 15) {
22870          IRTemp res;
22871          // go uncond
22872          mk_skip_over_T32_if_cond_is_false( condT );
22873          // now uncond
22874          res = newTemp(isH ? Ity_I16 : Ity_I8);
22875          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
22876                            NULL/*this is a load*/ ));
22877          putIRegT(rT, unop(isH ? Iop_16Uto32 : Iop_8Uto32, mkexpr(res)),
22878                       IRTemp_INVALID);
22879          DIP("ldrex%c r%u, [r%u]\n", isH ? 'h' : 'b', rT, rN);
22880          goto decode_success;
22881       }
22882    }
22883
22884    /* --------------- (T1) LDREXD --------------- */
22885    if (INSN0(15,4) == 0xE8D && INSN1(7,0) == 0x7F) {
22886       UInt rN  = INSN0(3,0);
22887       UInt rT  = INSN1(15,12);
22888       UInt rT2 = INSN1(11,8);
22889       if (!isBadRegT(rT) && !isBadRegT(rT2) && rT != rT2 && rN != 15) {
22890          IRTemp res;
22891          // go uncond
22892          mk_skip_over_T32_if_cond_is_false( condT );
22893          // now uncond
22894          res = newTemp(Ity_I64);
22895          // FIXME: assumes little-endian guest
22896          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
22897                            NULL/*this is a load*/ ));
22898          // FIXME: assumes little-endian guest
22899          putIRegT(rT,  unop(Iop_64to32,   mkexpr(res)), IRTemp_INVALID);
22900          putIRegT(rT2, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID);
22901          DIP("ldrexd r%u, r%u, [r%u]\n", rT, rT2, rN);
22902          goto decode_success;
22903       }
22904    }
22905
22906    /* ----------------- (T1) STREX ----------------- */
22907    if (INSN0(15,4) == 0xE84) {
22908       UInt rN   = INSN0(3,0);
22909       UInt rT   = INSN1(15,12);
22910       UInt rD   = INSN1(11,8);
22911       UInt imm8 = INSN1(7,0);
22912       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
22913           && rD != rN && rD != rT) {
22914          IRTemp resSC1, resSC32;
22915          // go uncond
22916          mk_skip_over_T32_if_cond_is_false( condT );
22917          // now uncond
22918          /* Ok, now we're unconditional.  Do the store. */
22919          resSC1 = newTemp(Ity_I1);
22920          stmt( IRStmt_LLSC(Iend_LE,
22921                            resSC1,
22922                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
22923                            getIRegT(rT)) );
22924          /* Set rD to 1 on failure, 0 on success.  Currently we have
22925             resSC1 == 0 on failure, 1 on success. */
22926          resSC32 = newTemp(Ity_I32);
22927          assign(resSC32,
22928                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
22929          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
22930          DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4);
22931          goto decode_success;
22932       }
22933    }
22934
22935    /* --------------- (T1) STREX{B,H} --------------- */
22936    if (INSN0(15,4) == 0xE8C
22937        && (INSN1(11,4) == 0xF4 || INSN1(11,4) == 0xF5)) {
22938       UInt rN  = INSN0(3,0);
22939       UInt rT  = INSN1(15,12);
22940       UInt rD  = INSN1(3,0);
22941       Bool isH = INSN1(11,4) == 0xF5;
22942       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
22943           && rD != rN && rD != rT) {
22944          IRTemp resSC1, resSC32;
22945          // go uncond
22946          mk_skip_over_T32_if_cond_is_false( condT );
22947          // now uncond
22948          /* Ok, now we're unconditional.  Do the store. */
22949          resSC1 = newTemp(Ity_I1);
22950          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN),
22951                            unop(isH ? Iop_32to16 : Iop_32to8,
22952                                 getIRegT(rT))) );
22953          /* Set rD to 1 on failure, 0 on success.  Currently we have
22954             resSC1 == 0 on failure, 1 on success. */
22955          resSC32 = newTemp(Ity_I32);
22956          assign(resSC32,
22957                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
22958          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
22959          DIP("strex%c r%u, r%u, [r%u]\n", isH ? 'h' : 'b', rD, rT, rN);
22960          goto decode_success;
22961       }
22962    }
22963
22964    /* ---------------- (T1) STREXD ---------------- */
22965    if (INSN0(15,4) == 0xE8C && INSN1(7,4) == BITS4(0,1,1,1)) {
22966       UInt rN  = INSN0(3,0);
22967       UInt rT  = INSN1(15,12);
22968       UInt rT2 = INSN1(11,8);
22969       UInt rD  = INSN1(3,0);
22970       if (!isBadRegT(rD) && !isBadRegT(rT) && !isBadRegT(rT2)
22971           && rN != 15 && rD != rN && rD != rT && rD != rT2) {
22972          IRTemp resSC1, resSC32, data;
22973          // go uncond
22974          mk_skip_over_T32_if_cond_is_false( condT );
22975          // now uncond
22976          /* Ok, now we're unconditional.  Do the store. */
22977          resSC1 = newTemp(Ity_I1);
22978          data = newTemp(Ity_I64);
22979          // FIXME: assumes little-endian guest
22980          assign(data, binop(Iop_32HLto64, getIRegT(rT2), getIRegT(rT)));
22981          // FIXME: assumes little-endian guest
22982          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), mkexpr(data)));
22983          /* Set rD to 1 on failure, 0 on success.  Currently we have
22984             resSC1 == 0 on failure, 1 on success. */
22985          resSC32 = newTemp(Ity_I32);
22986          assign(resSC32,
22987                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
22988          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
22989          DIP("strexd r%u, r%u, r%u, [r%u]\n", rD, rT, rT2, rN);
22990          goto decode_success;
22991       }
22992    }
22993
22994    /* -------------- v7 barrier insns -------------- */
22995    if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF00) == 0x8F00) {
22996       /* FIXME: should this be unconditional? */
22997       /* XXX this isn't really right, is it?  The generated IR does
22998          them unconditionally.  I guess it doesn't matter since it
22999          doesn't do any harm to do them even when the guarding
23000          condition is false -- it's just a performance loss. */
23001       switch (INSN1(7,0)) {
23002          case 0x4F: /* DSB sy */
23003          case 0x4E: /* DSB st */
23004          case 0x4B: /* DSB ish */
23005          case 0x4A: /* DSB ishst */
23006          case 0x47: /* DSB nsh */
23007          case 0x46: /* DSB nshst */
23008          case 0x43: /* DSB osh */
23009          case 0x42: /* DSB oshst */
23010             stmt( IRStmt_MBE(Imbe_Fence) );
23011             DIP("DSB\n");
23012             goto decode_success;
23013          case 0x5F: /* DMB sy */
23014          case 0x5E: /* DMB st */
23015          case 0x5B: /* DMB ish */
23016          case 0x5A: /* DMB ishst */
23017          case 0x57: /* DMB nsh */
23018          case 0x56: /* DMB nshst */
23019          case 0x53: /* DMB osh */
23020          case 0x52: /* DMB oshst */
23021             stmt( IRStmt_MBE(Imbe_Fence) );
23022             DIP("DMB\n");
23023             goto decode_success;
23024          case 0x6F: /* ISB */
23025             stmt( IRStmt_MBE(Imbe_Fence) );
23026             DIP("ISB\n");
23027             goto decode_success;
23028          default:
23029             break;
23030       }
23031    }
23032
23033    /* ---------------------- PLD{,W} ---------------------- */
23034    if ((INSN0(15,4) & 0xFFD) == 0xF89 && INSN1(15,12) == 0xF) {
23035       /* FIXME: should this be unconditional? */
23036       /* PLD/PLDW immediate, encoding T1 */
23037       UInt rN    = INSN0(3,0);
23038       UInt bW    = INSN0(5,5);
23039       UInt imm12 = INSN1(11,0);
23040       DIP("pld%s [r%u, #%u]\n", bW ? "w" : "",  rN, imm12);
23041       goto decode_success;
23042    }
23043
23044    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,8) == 0xFC) {
23045       /* FIXME: should this be unconditional? */
23046       /* PLD/PLDW immediate, encoding T2 */
23047       UInt rN    = INSN0(3,0);
23048       UInt bW    = INSN0(5,5);
23049       UInt imm8  = INSN1(7,0);
23050       DIP("pld%s [r%u, #-%u]\n", bW ? "w" : "",  rN, imm8);
23051       goto decode_success;
23052    }
23053
23054    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,6) == 0x3C0) {
23055       /* FIXME: should this be unconditional? */
23056       /* PLD/PLDW register, encoding T1 */
23057       UInt rN   = INSN0(3,0);
23058       UInt rM   = INSN1(3,0);
23059       UInt bW   = INSN0(5,5);
23060       UInt imm2 = INSN1(5,4);
23061       if (!isBadRegT(rM)) {
23062          DIP("pld%s [r%u, r%u, lsl %u]\n", bW ? "w" : "", rN, rM, imm2);
23063          goto decode_success;
23064       }
23065       /* fall through */
23066    }
23067
23068    /* -------------- read CP15 TPIDRURO register ------------- */
23069    /* mrc     p15, 0,  r0, c13, c0, 3  up to
23070       mrc     p15, 0, r14, c13, c0, 3
23071    */
23072    /* I don't know whether this is really v7-only.  But anyway, we
23073       have to support it since arm-linux uses TPIDRURO as a thread
23074       state register. */
23075    if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F70)) {
23076       UInt rD = INSN1(15,12);
23077       if (!isBadRegT(rD)) {
23078          putIRegT(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32), condT);
23079          DIP("mrc p15,0, r%u, c13, c0, 3\n", rD);
23080          goto decode_success;
23081       }
23082       /* fall through */
23083    }
23084
23085    /* ------------ read/write CP15 TPIDRURW register ----------- */
23086    /* mcr     p15, 0, r0,  c13, c0, 2 (r->cr xfer)  up to
23087       mcr     p15, 0, r14, c13, c0, 2
23088
23089       mrc     p15, 0, r0,  c13, c0, 2 (rc->r xfer)  up to
23090       mrc     p15, 0, r14, c13, c0, 2
23091    */
23092    if ((INSN0(15,0) == 0xEE0D) && (INSN1(11,0) == 0x0F50)) {
23093       UInt rS = INSN1(15,12);
23094       if (!isBadRegT(rS)) {
23095          putMiscReg32(OFFB_TPIDRURW, getIRegT(rS), condT);
23096          DIP("mcr p15,0, r%u, c13, c0, 2\n", rS);
23097          goto decode_success;
23098       }
23099       /* fall through */
23100    }
23101    if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F50)) {
23102       UInt rD = INSN1(15,12);
23103       if (!isBadRegT(rD)) {
23104          putIRegT(rD, IRExpr_Get(OFFB_TPIDRURW, Ity_I32), condT);
23105          DIP("mrc p15,0, r%u, c13, c0, 2\n", rD);
23106          goto decode_success;
23107       }
23108       /* fall through */
23109    }
23110
23111    /* -------------- read CP15 PMUSRENR register ------------- */
23112    /* mrc     p15, 0, r0,  c9, c14, 0  up to
23113       mrc     p15, 0, r14, c9, c14, 0
23114       See comment on the ARM equivalent of this (above) for details.
23115    */
23116    if ((INSN0(15,0) == 0xEE19) && (INSN1(11,0) == 0x0F1E)) {
23117       UInt rD = INSN1(15,12);
23118       if (!isBadRegT(rD)) {
23119          putIRegT(rD, mkU32(0), condT);
23120          DIP("mrc p15,0, r%u, c9, c14, 0\n", rD);
23121          goto decode_success;
23122       }
23123       /* fall through */
23124    }
23125
23126    /* ------------------- CLREX ------------------ */
23127    if (INSN0(15,0) == 0xF3BF && INSN1(15,0) == 0x8F2F) {
23128       /* AFAICS, this simply cancels a (all?) reservations made by a
23129          (any?) preceding LDREX(es).  Arrange to hand it through to
23130          the back end. */
23131       mk_skip_over_T32_if_cond_is_false( condT );
23132       stmt( IRStmt_MBE(Imbe_CancelReservation) );
23133       DIP("clrex\n");
23134       goto decode_success;
23135    }
23136
23137    /* ------------------- NOP ------------------ */
23138    if (INSN0(15,0) == 0xF3AF && INSN1(15,0) == 0x8000) {
23139       DIP("nop\n");
23140       goto decode_success;
23141    }
23142
23143    /* -------------- (T1) LDRT reg+#imm8 -------------- */
23144    /* Load Register Unprivileged:
23145       ldrt Rt, [Rn, #imm8]
23146    */
23147    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,1)
23148        && INSN1(11,8) == BITS4(1,1,1,0)) {
23149       UInt rT    = INSN1(15,12);
23150       UInt rN    = INSN0(3,0);
23151       UInt imm8  = INSN1(7,0);
23152       Bool valid = True;
23153       if (rN == 15 || isBadRegT(rT)) valid = False;
23154       if (valid) {
23155          put_ITSTATE(old_itstate);
23156          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23157          IRTemp newRt = newTemp(Ity_I32);
23158          loadGuardedLE( newRt, ILGop_Ident32, ea, llGetIReg(rT), condT );
23159          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23160          put_ITSTATE(new_itstate);
23161          DIP("ldrt r%u, [r%u, #%u]\n", rT, rN, imm8);
23162          goto decode_success;
23163       }
23164    }
23165
23166    /* -------------- (T1) STRT reg+#imm8 -------------- */
23167    /* Store Register Unprivileged:
23168       strt Rt, [Rn, #imm8]
23169    */
23170    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,0)
23171        && INSN1(11,8) == BITS4(1,1,1,0)) {
23172       UInt rT    = INSN1(15,12);
23173       UInt rN    = INSN0(3,0);
23174       UInt imm8  = INSN1(7,0);
23175       Bool valid = True;
23176       if (rN == 15 || isBadRegT(rT)) valid = False;
23177       if (valid) {
23178          put_ITSTATE(old_itstate);
23179          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23180          storeGuardedLE( address, llGetIReg(rT), condT );
23181          put_ITSTATE(new_itstate);
23182          DIP("strt r%u, [r%u, #%u]\n", rT, rN, imm8);
23183          goto decode_success;
23184       }
23185    }
23186
23187    /* -------------- (T1) STRBT reg+#imm8 -------------- */
23188    /* Store Register Byte Unprivileged:
23189       strbt Rt, [Rn, #imm8]
23190    */
23191    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,0)
23192        && INSN1(11,8) == BITS4(1,1,1,0)) {
23193       UInt rT    = INSN1(15,12);
23194       UInt rN    = INSN0(3,0);
23195       UInt imm8  = INSN1(7,0);
23196       Bool valid = True;
23197       if (rN == 15 || isBadRegT(rT)) valid = False;
23198       if (valid) {
23199          put_ITSTATE(old_itstate);
23200          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23201          IRExpr* data = unop(Iop_32to8, llGetIReg(rT));
23202          storeGuardedLE( address, data, condT );
23203          put_ITSTATE(new_itstate);
23204          DIP("strbt r%u, [r%u, #%u]\n", rT, rN, imm8);
23205          goto decode_success;
23206       }
23207    }
23208
23209    /* -------------- (T1) LDRHT reg+#imm8 -------------- */
23210    /* Load Register Halfword Unprivileged:
23211       ldrht Rt, [Rn, #imm8]
23212    */
23213    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,1)
23214        && INSN1(11,8) == BITS4(1,1,1,0)) {
23215       UInt rN    = INSN0(3,0);
23216       Bool valid = True;
23217       if (rN == 15) {
23218          /* In this case our instruction is LDRH (literal), in fact:
23219             LDRH (literal) was realized earlier, so we don't want to
23220             make it twice. */
23221          valid = False;
23222       }
23223       UInt rT    = INSN1(15,12);
23224       UInt imm8  = INSN1(7,0);
23225       if (isBadRegT(rT)) valid = False;
23226       if (valid) {
23227          put_ITSTATE(old_itstate);
23228          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23229          IRTemp newRt = newTemp(Ity_I32);
23230          loadGuardedLE( newRt, ILGop_16Uto32, ea, llGetIReg(rT), condT );
23231          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23232          put_ITSTATE(new_itstate);
23233          DIP("ldrht r%u, [r%u, #%u]\n", rT, rN, imm8);
23234          goto decode_success;
23235       }
23236    }
23237
23238    /* -------------- (T1) LDRSHT reg+#imm8 -------------- */
23239    /* Load Register Signed Halfword Unprivileged:
23240       ldrsht Rt, [Rn, #imm8]
23241    */
23242    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(1,1)
23243        && INSN1(11,8) == BITS4(1,1,1,0)) {
23244       UInt rN    = INSN0(3,0);
23245       Bool valid = True;
23246       if (rN == 15) {
23247          /* In this case our instruction is LDRSH (literal), in fact:
23248             LDRSH (literal) was realized earlier, so we don't want to
23249             make it twice. */
23250          valid = False;
23251       }
23252       UInt rT    = INSN1(15,12);
23253       UInt imm8  = INSN1(7,0);
23254       if (isBadRegT(rT)) valid = False;
23255       if (valid) {
23256          put_ITSTATE(old_itstate);
23257          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23258          IRTemp newRt = newTemp(Ity_I32);
23259          loadGuardedLE( newRt, ILGop_16Sto32, ea, llGetIReg(rT), condT );
23260          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23261          put_ITSTATE(new_itstate);
23262          DIP("ldrsht r%u, [r%u, #%u]\n", rT, rN, imm8);
23263          goto decode_success;
23264       }
23265    }
23266
23267    /* -------------- (T1) STRHT reg+#imm8 -------------- */
23268    /* Store Register Halfword Unprivileged:
23269       strht Rt, [Rn, #imm8]
23270    */
23271    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,0)
23272        && INSN1(11,8) == BITS4(1,1,1,0)) {
23273       UInt rT    = INSN1(15,12);
23274       UInt rN    = INSN0(3,0);
23275       UInt imm8  = INSN1(7,0);
23276       Bool valid = True;
23277       if (rN == 15 || isBadRegT(rT)) valid = False;
23278       if (valid) {
23279          put_ITSTATE(old_itstate);
23280          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23281          IRExpr* data = unop(Iop_32to16, llGetIReg(rT));
23282          storeGuardedLE( address, data, condT );
23283          put_ITSTATE(new_itstate);
23284          DIP("strht r%u, [r%u, #%u]\n", rT, rN, imm8);
23285          goto decode_success;
23286       }
23287    }
23288
23289    /* -------------- (T1) LDRBT reg+#imm8 -------------- */
23290    /* Load Register Byte Unprivileged:
23291       ldrbt Rt, [Rn, #imm8]
23292    */
23293    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,1)
23294        && INSN1(11,8) == BITS4(1,1,1,0)) {
23295       UInt rN    = INSN0(3,0);
23296       UInt rT    = INSN1(15,12);
23297       UInt imm8  = INSN1(7,0);
23298       Bool valid = True;
23299       if (rN == 15 /* insn is LDRB (literal) */) valid = False;
23300       if (isBadRegT(rT)) valid = False;
23301       if (valid) {
23302          put_ITSTATE(old_itstate);
23303          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23304          IRTemp newRt = newTemp(Ity_I32);
23305          loadGuardedLE( newRt, ILGop_8Uto32, ea, llGetIReg(rT), condT );
23306          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23307          put_ITSTATE(new_itstate);
23308          DIP("ldrbt r%u, [r%u, #%u]\n", rT, rN, imm8);
23309          goto decode_success;
23310       }
23311    }
23312
23313    /* -------------- (T1) LDRSBT reg+#imm8 -------------- */
23314    /* Load Register Signed Byte Unprivileged:
23315       ldrsbt Rt, [Rn, #imm8]
23316    */
23317    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
23318        && INSN1(11,8) == BITS4(1,1,1,0)) {
23319       UInt rN    = INSN0(3,0);
23320       Bool valid = True;
23321       UInt rT    = INSN1(15,12);
23322       UInt imm8  = INSN1(7,0);
23323       if (rN == 15 /* insn is LDRSB (literal) */) valid = False;
23324       if (isBadRegT(rT)) valid = False;
23325       if (valid) {
23326          put_ITSTATE(old_itstate);
23327          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23328          IRTemp newRt = newTemp(Ity_I32);
23329          loadGuardedLE( newRt, ILGop_8Sto32, ea, llGetIReg(rT), condT );
23330          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23331          put_ITSTATE(new_itstate);
23332          DIP("ldrsbt r%u, [r%u, #%u]\n", rT, rN, imm8);
23333          goto decode_success;
23334       }
23335    }
23336
23337    /* -------------- (T1) PLI reg+#imm12 -------------- */
23338    /* Preload Instruction:
23339       pli [Rn, #imm12]
23340    */
23341    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,1,0) && INSN0(5,4) == BITS2(0,1)
23342        && INSN1(15,12) == BITS4(1,1,1,1)) {
23343       UInt rN    = INSN0(3,0);
23344       UInt imm12 = INSN1(11,0);
23345       if (rN != 15) {
23346          DIP("pli [r%u, #%u]\n", rN, imm12);
23347          goto decode_success;
23348       }
23349    }
23350
23351    /* -------------- (T2) PLI reg-#imm8 -------------- */
23352    /* Preload Instruction:
23353       pli [Rn, #-imm8]
23354    */
23355    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
23356        && INSN1(15,8) == BITS8(1,1,1,1,1,1,0,0)) {
23357       UInt rN   = INSN0(3,0);
23358       UInt imm8 = INSN1(7,0);
23359       if (rN != 15) {
23360          DIP("pli [r%u, #-%u]\n", rN, imm8);
23361          goto decode_success;
23362       }
23363    }
23364
23365    /* -------------- (T3) PLI PC+/-#imm12 -------------- */
23366    /* Preload Instruction:
23367       pli [PC, #+/-imm12]
23368    */
23369    if (INSN0(15,8) == BITS8(1,1,1,1,1,0,0,1)
23370        && INSN0(6,0) == BITS7(0,0,1,1,1,1,1)
23371        && INSN1(15,12) == BITS4(1,1,1,1)) {
23372       UInt imm12 = INSN1(11,0);
23373       UInt bU    = INSN0(7,7);
23374       DIP("pli [pc, #%c%u]\n", bU == 1 ? '+' : '-', imm12);
23375       goto decode_success;
23376    }
23377
23378    /* ----------------------------------------------------------- */
23379    /* -- VFP (CP 10, CP 11) instructions (in Thumb mode)       -- */
23380    /* ----------------------------------------------------------- */
23381
23382    if (INSN0(15,12) == BITS4(1,1,1,0)) {
23383       UInt insn28 = (INSN0(11,0) << 16) | INSN1(15,0);
23384       Bool ok_vfp = decode_CP10_CP11_instruction (
23385                        &dres, insn28, condT, ARMCondAL/*bogus*/,
23386                        True/*isT*/
23387                     );
23388       if (ok_vfp)
23389          goto decode_success;
23390    }
23391
23392    /* ----------------------------------------------------------- */
23393    /* -- NEON instructions (only v7 and below, in Thumb mode)  -- */
23394    /* ----------------------------------------------------------- */
23395
23396    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
23397       UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
23398       Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
23399                         &dres, insn32, condT, True/*isT*/
23400                      );
23401       if (ok_neon)
23402          goto decode_success;
23403    }
23404
23405    /* ----------------------------------------------------------- */
23406    /* -- v6 media instructions (in Thumb mode)                 -- */
23407    /* ----------------------------------------------------------- */
23408
23409    { UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
23410      Bool ok_v6m = decode_V6MEDIA_instruction(
23411                       &dres, insn32, condT, ARMCondAL/*bogus*/,
23412                       True/*isT*/
23413                    );
23414      if (ok_v6m)
23415         goto decode_success;
23416    }
23417
23418    /* ----------------------------------------------------------- */
23419    /* -- v8 instructions (in Thumb mode)                       -- */
23420    /* ----------------------------------------------------------- */
23421
23422    /* If we get here, it means that all attempts to decode the
23423       instruction as ARMv7 or earlier have failed.  So, if we're doing
23424       ARMv8 or later, here is the point to try for it. */
23425
23426    if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
23427       UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
23428       Bool ok_v8
23429          = decode_V8_instruction( &dres, insn32, condT, True/*isT*/,
23430                                   old_itstate, new_itstate );
23431       if (ok_v8)
23432          goto decode_success;
23433    }
23434
23435    /* ----------------------------------------------------------- */
23436    /* -- Undecodable                                           -- */
23437    /* ----------------------------------------------------------- */
23438
23439    goto decode_failure;
23440    /*NOTREACHED*/
23441
23442   decode_failure:
23443    /* All decode failures end up here. */
23444    if (sigill_diag)
23445       vex_printf("disInstr(thumb): unhandled instruction: "
23446                  "0x%04x 0x%04x\n", (UInt)insn0, (UInt)insn1);
23447
23448    /* Back up ITSTATE to the initial value for this instruction.
23449       If we don't do that, any subsequent restart of the instruction
23450       will restart with the wrong value. */
23451    if (old_itstate != IRTemp_INVALID)
23452       put_ITSTATE(old_itstate);
23453
23454    /* Tell the dispatcher that this insn cannot be decoded, and so has
23455       not been executed, and (is currently) the next to be executed.
23456       R15 should be up-to-date since it made so at the start of each
23457       insn, but nevertheless be paranoid and update it again right
23458       now. */
23459    vassert(0 == (guest_R15_curr_instr_notENC & 1));
23460    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
23461    dres.len         = 0;
23462    dres.whatNext    = Dis_StopHere;
23463    dres.jk_StopHere = Ijk_NoDecode;
23464    return dres;
23465
23466   decode_success:
23467    /* All decode successes end up here. */
23468    vassert(dres.len == 4 || dres.len == 2 || dres.len == 20);
23469    switch (dres.whatNext) {
23470       case Dis_Continue:
23471          llPutIReg(15, mkU32(dres.len + (guest_R15_curr_instr_notENC | 1)));
23472          break;
23473       case Dis_StopHere:
23474          break;
23475       default:
23476          vassert(0);
23477    }
23478
23479    DIP("\n");
23480
23481    return dres;
23482
23483 #  undef INSN0
23484 #  undef INSN1
23485 }
23486
23487 #undef DIP
23488 #undef DIS
23489
23490
23491 /* Helper table for figuring out how many insns an IT insn
23492    conditionalises.
23493
23494    An ITxyz instruction of the format "1011 1111 firstcond mask"
23495    conditionalises some number of instructions, as indicated by the
23496    following table.  A value of zero indicates the instruction is
23497    invalid in some way.
23498
23499    mask = 0 means this isn't an IT instruction
23500    fc = 15 (NV) means unpredictable
23501
23502    The line fc = 14 (AL) is different from the others; there are
23503    additional constraints in this case.
23504
23505           mask(0 ..                   15)
23506         +--------------------------------
23507    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23508    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23509         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23510         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23511         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23512         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23513         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23514         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23515         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23516         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23517         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23518         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23519         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23520         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23521         | 0 4 3 0 2 0 0 0 1 0 0 0 0 0 0 0
23522    15)  | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
23523
23524    To be conservative with the analysis, let's rule out the mask = 0
23525    case, since that isn't an IT insn at all.  But for all the other
23526    cases where the table contains zero, that means unpredictable, so
23527    let's say 4 to be conservative.  Hence we have a safe value for any
23528    IT (mask,fc) pair that the CPU would actually identify as an IT
23529    instruction.  The final table is
23530
23531           mask(0 ..                   15)
23532         +--------------------------------
23533    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23534    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23535         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23536         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23537         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23538         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23539         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23540         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23541         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23542         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23543         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23544         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23545         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23546         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23547         | 0 4 3 4 2 4 4 4 1 4 4 4 4 4 4 4
23548    15)  | 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
23549 */
23550 static const UChar it_length_table[256]
23551    = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23552        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23553        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23554        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23555        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23556        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23557        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23558        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23559        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23560        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23561        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23562        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23563        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23564        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23565        0, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
23566        0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
23567      };
23568
23569
23570 /*------------------------------------------------------------*/
23571 /*--- Top-level fn                                         ---*/
23572 /*------------------------------------------------------------*/
23573
23574 /* Disassemble a single instruction into IR.  The instruction
23575    is located in host memory at &guest_code[delta]. */
23576
23577 DisResult disInstr_ARM ( IRSB*        irsb_IN,
23578                          const UChar* guest_code_IN,
23579                          Long         delta_ENCODED,
23580                          Addr         guest_IP_ENCODED,
23581                          VexArch      guest_arch,
23582                          const VexArchInfo* archinfo,
23583                          const VexAbiInfo*  abiinfo,
23584                          VexEndness   host_endness_IN,
23585                          Bool         sigill_diag_IN )
23586 {
23587    DisResult dres;
23588    Bool isThumb = (Bool)(guest_IP_ENCODED & 1);
23589
23590    /* Set globals (see top of this file) */
23591    vassert(guest_arch == VexArchARM);
23592
23593    irsb            = irsb_IN;
23594    host_endness    = host_endness_IN;
23595    __curr_is_Thumb = isThumb;
23596
23597    if (isThumb) {
23598       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED - 1;
23599    } else {
23600       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED;
23601    }
23602
23603    if (isThumb) {
23604       dres = disInstr_THUMB_WRK ( &guest_code_IN[delta_ENCODED - 1],
23605                                   archinfo, abiinfo, sigill_diag_IN );
23606    } else {
23607       dres = disInstr_ARM_WRK ( &guest_code_IN[delta_ENCODED],
23608                                 archinfo, abiinfo, sigill_diag_IN );
23609    }
23610
23611    return dres;
23612 }
23613
23614 /* Test program for the conversion of IRCmpF64Result values to VFP
23615    nzcv values.  See handling of FCMPD et al above. */
23616 /*
23617 UInt foo ( UInt x )
23618 {
23619    UInt ix    = ((x >> 5) & 3) | (x & 1);
23620    UInt termL = (((((ix ^ 1) << 30) - 1) >> 29) + 1);
23621    UInt termR = (ix & (ix >> 1) & 1);
23622    return termL  -  termR;
23623 }
23624
23625 void try ( char* s, UInt ir, UInt req )
23626 {
23627    UInt act = foo(ir);
23628    printf("%s 0x%02x -> req %d%d%d%d act %d%d%d%d (0x%x)\n",
23629           s, ir, (req >> 3) & 1, (req >> 2) & 1,
23630                  (req >> 1) & 1, (req >> 0) & 1,
23631                  (act >> 3) & 1, (act >> 2) & 1,
23632                  (act >> 1) & 1, (act >> 0) & 1, act);
23633
23634 }
23635
23636 int main ( void )
23637 {
23638    printf("\n");
23639    try("UN", 0x45, 0b0011);
23640    try("LT", 0x01, 0b1000);
23641    try("GT", 0x00, 0b0010);
23642    try("EQ", 0x40, 0b0110);
23643    printf("\n");
23644    return 0;
23645 }
23646 */
23647
23648 /* Spare code for doing reference implementations of various 64-bit
23649    SIMD interleaves/deinterleaves/concatenation ops. */
23650 /*
23651 // Split a 64 bit value into 4 16 bit ones, in 32-bit IRTemps with
23652 // the top halves guaranteed to be zero.
23653 static void break64to16s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
23654                            IRTemp* out0, IRTemp v64 )
23655 {
23656   if (out3) *out3 = newTemp(Ity_I32);
23657   if (out2) *out2 = newTemp(Ity_I32);
23658   if (out1) *out1 = newTemp(Ity_I32);
23659   if (out0) *out0 = newTemp(Ity_I32);
23660   IRTemp hi32 = newTemp(Ity_I32);
23661   IRTemp lo32 = newTemp(Ity_I32);
23662   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
23663   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
23664   if (out3) assign(*out3, binop(Iop_Shr32, mkexpr(hi32), mkU8(16)));
23665   if (out2) assign(*out2, binop(Iop_And32, mkexpr(hi32), mkU32(0xFFFF)));
23666   if (out1) assign(*out1, binop(Iop_Shr32, mkexpr(lo32), mkU8(16)));
23667   if (out0) assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFFFF)));
23668 }
23669
23670 // Make a 64 bit value from 4 16 bit ones, each of which is in a 32 bit
23671 // IRTemp.
23672 static IRTemp mk64from16s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
23673 {
23674   IRTemp hi32 = newTemp(Ity_I32);
23675   IRTemp lo32 = newTemp(Ity_I32);
23676   assign(hi32,
23677          binop(Iop_Or32,
23678                binop(Iop_Shl32, mkexpr(in3), mkU8(16)),
23679                binop(Iop_And32, mkexpr(in2), mkU32(0xFFFF))));
23680   assign(lo32,
23681          binop(Iop_Or32,
23682                binop(Iop_Shl32, mkexpr(in1), mkU8(16)),
23683                binop(Iop_And32, mkexpr(in0), mkU32(0xFFFF))));
23684   IRTemp res = newTemp(Ity_I64);
23685   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
23686   return res;
23687 }
23688
23689 static IRExpr* mk_InterleaveLO16x4 ( IRTemp a3210, IRTemp b3210 )
23690 {
23691   // returns a1 b1 a0 b0
23692   IRTemp a1, a0, b1, b0;
23693   break64to16s(NULL, NULL, &a1, &a0, a3210);
23694   break64to16s(NULL, NULL, &b1, &b0, b3210);
23695   return mkexpr(mk64from16s(a1, b1, a0, b0));
23696 }
23697
23698 static IRExpr* mk_InterleaveHI16x4 ( IRTemp a3210, IRTemp b3210 )
23699 {
23700   // returns a3 b3 a2 b2
23701   IRTemp a3, a2, b3, b2;
23702   break64to16s(&a3, &a2, NULL, NULL, a3210);
23703   break64to16s(&b3, &b2, NULL, NULL, b3210);
23704   return mkexpr(mk64from16s(a3, b3, a2, b2));
23705 }
23706
23707 static IRExpr* mk_CatEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23708 {
23709   // returns a2 a0 b2 b0
23710   IRTemp a2, a0, b2, b0;
23711   break64to16s(NULL, &a2, NULL, &a0, a3210);
23712   break64to16s(NULL, &b2, NULL, &b0, b3210);
23713   return mkexpr(mk64from16s(a2, a0, b2, b0));
23714 }
23715
23716 static IRExpr* mk_CatOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23717 {
23718   // returns a3 a1 b3 b1
23719   IRTemp a3, a1, b3, b1;
23720   break64to16s(&a3, NULL, &a1, NULL, a3210);
23721   break64to16s(&b3, NULL, &b1, NULL, b3210);
23722   return mkexpr(mk64from16s(a3, a1, b3, b1));
23723 }
23724
23725 static IRExpr* mk_InterleaveOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23726 {
23727   // returns a3 b3 a1 b1
23728   IRTemp a3, b3, a1, b1;
23729   break64to16s(&a3, NULL, &a1, NULL, a3210);
23730   break64to16s(&b3, NULL, &b1, NULL, b3210);
23731   return mkexpr(mk64from16s(a3, b3, a1, b1));
23732 }
23733
23734 static IRExpr* mk_InterleaveEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23735 {
23736   // returns a2 b2 a0 b0
23737   IRTemp a2, b2, a0, b0;
23738   break64to16s(NULL, &a2, NULL, &a0, a3210);
23739   break64to16s(NULL, &b2, NULL, &b0, b3210);
23740   return mkexpr(mk64from16s(a2, b2, a0, b0));
23741 }
23742
23743 static void break64to8s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
23744                           IRTemp* out4, IRTemp* out3, IRTemp* out2,
23745                           IRTemp* out1,IRTemp* out0, IRTemp v64 )
23746 {
23747   if (out7) *out7 = newTemp(Ity_I32);
23748   if (out6) *out6 = newTemp(Ity_I32);
23749   if (out5) *out5 = newTemp(Ity_I32);
23750   if (out4) *out4 = newTemp(Ity_I32);
23751   if (out3) *out3 = newTemp(Ity_I32);
23752   if (out2) *out2 = newTemp(Ity_I32);
23753   if (out1) *out1 = newTemp(Ity_I32);
23754   if (out0) *out0 = newTemp(Ity_I32);
23755   IRTemp hi32 = newTemp(Ity_I32);
23756   IRTemp lo32 = newTemp(Ity_I32);
23757   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
23758   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
23759   if (out7)
23760     assign(*out7, binop(Iop_And32,
23761                         binop(Iop_Shr32, mkexpr(hi32), mkU8(24)),
23762                         mkU32(0xFF)));
23763   if (out6)
23764     assign(*out6, binop(Iop_And32,
23765                         binop(Iop_Shr32, mkexpr(hi32), mkU8(16)),
23766                         mkU32(0xFF)));
23767   if (out5)
23768     assign(*out5, binop(Iop_And32,
23769                         binop(Iop_Shr32, mkexpr(hi32), mkU8(8)),
23770                         mkU32(0xFF)));
23771   if (out4)
23772     assign(*out4, binop(Iop_And32, mkexpr(hi32), mkU32(0xFF)));
23773   if (out3)
23774     assign(*out3, binop(Iop_And32,
23775                         binop(Iop_Shr32, mkexpr(lo32), mkU8(24)),
23776                         mkU32(0xFF)));
23777   if (out2)
23778     assign(*out2, binop(Iop_And32,
23779                         binop(Iop_Shr32, mkexpr(lo32), mkU8(16)),
23780                         mkU32(0xFF)));
23781   if (out1)
23782     assign(*out1, binop(Iop_And32,
23783                         binop(Iop_Shr32, mkexpr(lo32), mkU8(8)),
23784                         mkU32(0xFF)));
23785   if (out0)
23786     assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFF)));
23787 }
23788
23789 static IRTemp mk64from8s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
23790                            IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
23791 {
23792   IRTemp hi32 = newTemp(Ity_I32);
23793   IRTemp lo32 = newTemp(Ity_I32);
23794   assign(hi32,
23795          binop(Iop_Or32,
23796                binop(Iop_Or32,
23797                      binop(Iop_Shl32,
23798                            binop(Iop_And32, mkexpr(in7), mkU32(0xFF)),
23799                            mkU8(24)),
23800                      binop(Iop_Shl32,
23801                            binop(Iop_And32, mkexpr(in6), mkU32(0xFF)),
23802                            mkU8(16))),
23803                binop(Iop_Or32,
23804                      binop(Iop_Shl32,
23805                            binop(Iop_And32, mkexpr(in5), mkU32(0xFF)), mkU8(8)),
23806                      binop(Iop_And32,
23807                            mkexpr(in4), mkU32(0xFF)))));
23808   assign(lo32,
23809          binop(Iop_Or32,
23810                binop(Iop_Or32,
23811                      binop(Iop_Shl32,
23812                            binop(Iop_And32, mkexpr(in3), mkU32(0xFF)),
23813                            mkU8(24)),
23814                      binop(Iop_Shl32,
23815                            binop(Iop_And32, mkexpr(in2), mkU32(0xFF)),
23816                            mkU8(16))),
23817                binop(Iop_Or32,
23818                      binop(Iop_Shl32,
23819                            binop(Iop_And32, mkexpr(in1), mkU32(0xFF)), mkU8(8)),
23820                      binop(Iop_And32,
23821                            mkexpr(in0), mkU32(0xFF)))));
23822   IRTemp res = newTemp(Ity_I64);
23823   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
23824   return res;
23825 }
23826
23827 static IRExpr* mk_InterleaveLO8x8 ( IRTemp a76543210, IRTemp b76543210 )
23828 {
23829   // returns a3 b3 a2 b2 a1 b1 a0 b0
23830   IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
23831   break64to8s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
23832   break64to8s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
23833   return mkexpr(mk64from8s(a3, b3, a2, b2, a1, b1, a0, b0));
23834 }
23835
23836 static IRExpr* mk_InterleaveHI8x8 ( IRTemp a76543210, IRTemp b76543210 )
23837 {
23838   // returns a7 b7 a6 b6 a5 b5 a4 b4
23839   IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
23840   break64to8s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
23841   break64to8s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
23842   return mkexpr(mk64from8s(a7, b7, a6, b6, a5, b5, a4, b4));
23843 }
23844
23845 static IRExpr* mk_CatEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23846 {
23847   // returns a6 a4 a2 a0 b6 b4 b2 b0
23848   IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
23849   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
23850   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
23851   return mkexpr(mk64from8s(a6, a4, a2, a0, b6, b4, b2, b0));
23852 }
23853
23854 static IRExpr* mk_CatOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23855 {
23856   // returns a7 a5 a3 a1 b7 b5 b3 b1
23857   IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
23858   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
23859   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
23860   return mkexpr(mk64from8s(a7, a5, a3, a1, b7, b5, b3, b1));
23861 }
23862
23863 static IRExpr* mk_InterleaveEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23864 {
23865   // returns a6 b6 a4 b4 a2 b2 a0 b0
23866   IRTemp a6, b6, a4, b4, a2, b2, a0, b0;
23867   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
23868   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
23869   return mkexpr(mk64from8s(a6, b6, a4, b4, a2, b2, a0, b0));
23870 }
23871
23872 static IRExpr* mk_InterleaveOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23873 {
23874   // returns a7 b7 a5 b5 a3 b3 a1 b1
23875   IRTemp a7, b7, a5, b5, a3, b3, a1, b1;
23876   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
23877   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
23878   return mkexpr(mk64from8s(a7, b7, a5, b5, a3, b3, a1, b1));
23879 }
23880
23881 static IRExpr* mk_InterleaveLO32x2 ( IRTemp a10, IRTemp b10 )
23882 {
23883   // returns a0 b0
23884   return binop(Iop_32HLto64, unop(Iop_64to32, mkexpr(a10)),
23885                              unop(Iop_64to32, mkexpr(b10)));
23886 }
23887
23888 static IRExpr* mk_InterleaveHI32x2 ( IRTemp a10, IRTemp b10 )
23889 {
23890   // returns a1 b1
23891   return binop(Iop_32HLto64, unop(Iop_64HIto32, mkexpr(a10)),
23892                              unop(Iop_64HIto32, mkexpr(b10)));
23893 }
23894 */
23895
23896 /*--------------------------------------------------------------------*/
23897 /*--- end                                         guest_arm_toIR.c ---*/
23898 /*--------------------------------------------------------------------*/