VEX/priv/guest_arm_toIR.c

   1
   2 /*--------------------------------------------------------------------*/
   3 /*--- begin                                       guest_arm_toIR.c ---*/
   4 /*--------------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Valgrind, a dynamic binary instrumentation
   8    framework.
   9
  10    Copyright (C) 2004-2017 OpenWorks LLP
  11       info@open-works.net
  12
  13    NEON support is
  14    Copyright (C) 2010-2017 Samsung Electronics
  15    contributed by Dmitry Zhurikhin <zhur@ispras.ru>
  16               and Kirill Batuzov <batuzovk@ispras.ru>
  17
  18    This program is free software; you can redistribute it and/or
  19    modify it under the terms of the GNU General Public License as
  20    published by the Free Software Foundation; either version 2 of the
  21    License, or (at your option) any later version.
  22
  23    This program is distributed in the hope that it will be useful, but
  24    WITHOUT ANY WARRANTY; without even the implied warranty of
  25    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  26    General Public License for more details.
  27
  28    You should have received a copy of the GNU General Public License
  29    along with this program; if not, write to the Free Software
  30    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  31    02110-1301, USA.
  32
  33    The GNU General Public License is contained in the file COPYING.
  34 */
  35
  36 /* XXXX thumb to check:
  37    that all cases where putIRegT writes r15, we generate a jump.
  38
  39    All uses of newTemp assign to an IRTemp and not a UInt
  40
  41    For all thumb loads and stores, including VFP ones, new-ITSTATE is
  42    backed out before the memory op, and restored afterwards.  This
  43    needs to happen even after we go uncond.  (and for sure it doesn't
  44    happen for VFP loads/stores right now).
  45
  46    VFP on thumb: check that we exclude all r13/r15 cases that we
  47    should.
  48
  49    XXXX thumb to do: improve the ITSTATE-zeroing optimisation by
  50    taking into account the number of insns guarded by an IT.
  51
  52    remove the nasty hack, in the spechelper, of looking for Or32(...,
  53    0xE0) in as the first arg to armg_calculate_condition, and instead
  54    use Slice44 as specified in comments in the spechelper.
  55
  56    add specialisations for armg_calculate_flag_c and _v, as they
  57    are moderately often needed in Thumb code.
  58
  59    Correctness: ITSTATE handling in Thumb SVCs is wrong.
  60
  61    Correctness (obscure): in m_transtab, when invalidating code
  62    address ranges, invalidate up to 18 bytes after the end of the
  63    range.  This is because the ITSTATE optimisation at the top of
  64    _THUMB_WRK below analyses up to 18 bytes before the start of any
  65    given instruction, and so might depend on the invalidated area.
  66 */
  67
  68 /* Limitations, etc
  69
  70    - pretty dodgy exception semantics for {LD,ST}Mxx and {LD,ST}RD.
  71      These instructions are non-restartable in the case where the
  72      transfer(s) fault.
  73
  74    - SWP: the restart jump back is Ijk_Boring; it should be
  75      Ijk_NoRedir but that's expensive.  See comments on casLE() in
  76      guest_x86_toIR.c.
  77 */
  78
  79 /* "Special" instructions.
  80
  81    This instruction decoder can decode four special instructions
  82    which mean nothing natively (are no-ops as far as regs/mem are
  83    concerned) but have meaning for supporting Valgrind.  A special
  84    instruction is flagged by a 16-byte preamble:
  85
  86       E1A0C1EC E1A0C6EC E1A0CEEC E1A0C9EC
  87       (mov r12, r12, ROR #3;   mov r12, r12, ROR #13;
  88        mov r12, r12, ROR #29;  mov r12, r12, ROR #19)
  89
  90    Following that, one of the following 3 are allowed
  91    (standard interpretation in parentheses):
  92
  93       E18AA00A (orr r10,r10,r10)   R3 = client_request ( R4 )
  94       E18BB00B (orr r11,r11,r11)   R3 = guest_NRADDR
  95       E18CC00C (orr r12,r12,r12)   branch-and-link-to-noredir R4
  96       E1899009 (orr r9,r9,r9)      IR injection
  97
  98    Any other bytes following the 16-byte preamble are illegal and
  99    constitute a failure in instruction decoding.  This all assumes
 100    that the preamble will never occur except in specific code
 101    fragments designed for Valgrind to catch.
 102 */
 103
 104 /* Translates ARM(v5) code to IR. */
 105
 106 #include "libvex_basictypes.h"
 107 #include "libvex_ir.h"
 108 #include "libvex.h"
 109 #include "libvex_guest_arm.h"
 110
 111 #include "main_util.h"
 112 #include "main_globals.h"
 113 #include "guest_generic_bb_to_IR.h"
 114 #include "guest_arm_defs.h"
 115
 116
 117 /*------------------------------------------------------------*/
 118 /*--- Globals                                              ---*/
 119 /*------------------------------------------------------------*/
 120
 121 /* These are set at the start of the translation of a instruction, so
 122    that we don't have to pass them around endlessly.  CONST means does
 123    not change during translation of the instruction.
 124 */
 125
 126 /* CONST: what is the host's endianness?  This has to do with float vs
 127    double register accesses on VFP, but it's complex and not properly
 128    thought out. */
 129 static VexEndness host_endness;
 130
 131 /* CONST: The guest address for the instruction currently being
 132    translated.  This is the real, "decoded" address (not subject
 133    to the CPSR.T kludge). */
 134 static Addr32 guest_R15_curr_instr_notENC;
 135
 136 /* CONST, FOR ASSERTIONS ONLY.  Indicates whether currently processed
 137    insn is Thumb (True) or ARM (False). */
 138 static Bool __curr_is_Thumb;
 139
 140 /* MOD: The IRSB* into which we're generating code. */
 141 static IRSB* irsb;
 142
 143 /* These are to do with handling writes to r15.  They are initially
 144    set at the start of disInstr_ARM_WRK to indicate no update,
 145    possibly updated during the routine, and examined again at the end.
 146    If they have been set to indicate a r15 update then a jump is
 147    generated.  Note, "explicit" jumps (b, bx, etc) are generated
 148    directly, not using this mechanism -- this is intended to handle
 149    the implicit-style jumps resulting from (eg) assigning to r15 as
 150    the result of insns we wouldn't normally consider branchy. */
 151
 152 /* MOD.  Initially False; set to True iff abovementioned handling is
 153    required. */
 154 static Bool r15written;
 155
 156 /* MOD.  Initially IRTemp_INVALID.  If the r15 branch to be generated
 157    is conditional, this holds the gating IRTemp :: Ity_I32.  If the
 158    branch to be generated is unconditional, this remains
 159    IRTemp_INVALID. */
 160 static IRTemp r15guard; /* :: Ity_I32, 0 or 1 */
 161
 162 /* MOD.  Initially Ijk_Boring.  If an r15 branch is to be generated,
 163    this holds the jump kind. */
 164 static IRTemp r15kind;
 165
 166
 167 /*------------------------------------------------------------*/
 168 /*--- Debugging output                                     ---*/
 169 /*------------------------------------------------------------*/
 170
 171 #define DIP(format, args...)           \
 172    if (vex_traceflags & VEX_TRACE_FE)  \
 173       vex_printf(format, ## args)
 174
 175 #define DIS(buf, format, args...)      \
 176    if (vex_traceflags & VEX_TRACE_FE)  \
 177       vex_sprintf(buf, format, ## args)
 178
 179 #define ASSERT_IS_THUMB \
 180    do { vassert(__curr_is_Thumb); } while (0)
 181
 182 #define ASSERT_IS_ARM \
 183    do { vassert(! __curr_is_Thumb); } while (0)
 184
 185
 186 /*------------------------------------------------------------*/
 187 /*--- Helper bits and pieces for deconstructing the        ---*/
 188 /*--- arm insn stream.                                     ---*/
 189 /*------------------------------------------------------------*/
 190
 191 /* Do a little-endian load of a 32-bit word, regardless of the
 192    endianness of the underlying host. */
 193 static inline UInt getUIntLittleEndianly ( const UChar* p )
 194 {
 195    UInt w = 0;
 196    w = (w << 8) | p[3];
 197    w = (w << 8) | p[2];
 198    w = (w << 8) | p[1];
 199    w = (w << 8) | p[0];
 200    return w;
 201 }
 202
 203 /* Do a little-endian load of a 16-bit word, regardless of the
 204    endianness of the underlying host. */
 205 static inline UShort getUShortLittleEndianly ( const UChar* p )
 206 {
 207    UShort w = 0;
 208    w = (w << 8) | p[1];
 209    w = (w << 8) | p[0];
 210    return w;
 211 }
 212
 213 static UInt ROR32 ( UInt x, UInt sh ) {
 214    vassert(sh >= 0 && sh < 32);
 215    if (sh == 0)
 216       return x;
 217    else
 218       return (x << (32-sh)) | (x >> sh);
 219 }
 220
 221 static Int popcount32 ( UInt x )
 222 {
 223    Int res = 0, i;
 224    for (i = 0; i < 32; i++) {
 225       res += (x & 1);
 226       x >>= 1;
 227    }
 228    return res;
 229 }
 230
 231 static UInt setbit32 ( UInt x, Int ix, UInt b )
 232 {
 233    UInt mask = 1 << ix;
 234    x &= ~mask;
 235    x |= ((b << ix) & mask);
 236    return x;
 237 }
 238
 239 #define BITS2(_b1,_b0) \
 240    (((_b1) << 1) | (_b0))
 241
 242 #define BITS3(_b2,_b1,_b0)                      \
 243   (((_b2) << 2) | ((_b1) << 1) | (_b0))
 244
 245 #define BITS4(_b3,_b2,_b1,_b0) \
 246    (((_b3) << 3) | ((_b2) << 2) | ((_b1) << 1) | (_b0))
 247
 248 #define BITS8(_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 249    ((BITS4((_b7),(_b6),(_b5),(_b4)) << 4) \
 250     | BITS4((_b3),(_b2),(_b1),(_b0)))
 251
 252 #define BITS5(_b4,_b3,_b2,_b1,_b0)  \
 253    (BITS8(0,0,0,(_b4),(_b3),(_b2),(_b1),(_b0)))
 254 #define BITS6(_b5,_b4,_b3,_b2,_b1,_b0)  \
 255    (BITS8(0,0,(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 256 #define BITS7(_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 257    (BITS8(0,(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 258
 259 #define BITS9(_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)      \
 260    (((_b8) << 8) \
 261     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 262
 263 #define BITS10(_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 264    (((_b9) << 9) | ((_b8) << 8)                                \
 265     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 266
 267 #define BITS11(_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 268    ( ((_b10) << 10) | ((_b9) << 9) | ((_b8) << 8)              \
 269     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 270
 271 #define BITS12(_b11,_b10,_b9,_b8,_b7,_b6,_b5,_b4,_b3,_b2,_b1,_b0)  \
 272    ( ((_b11) << 11) | ((_b10) << 10) | ((_b9) << 9) | ((_b8) << 8) \
 273     | BITS8((_b7),(_b6),(_b5),(_b4),(_b3),(_b2),(_b1),(_b0)))
 274
 275 /* produces _uint[_bMax:_bMin] */
 276 #define SLICE_UInt(_uint,_bMax,_bMin) \
 277    (( ((UInt)(_uint)) >> (_bMin)) \
 278     & (UInt)((1ULL << ((_bMax) - (_bMin) + 1)) - 1ULL))
 279
 280
 281 /*------------------------------------------------------------*/
 282 /*--- Helper bits and pieces for creating IR fragments.    ---*/
 283 /*------------------------------------------------------------*/
 284
 285 static IRExpr* mkU64 ( ULong i )
 286 {
 287    return IRExpr_Const(IRConst_U64(i));
 288 }
 289
 290 static IRExpr* mkU32 ( UInt i )
 291 {
 292    return IRExpr_Const(IRConst_U32(i));
 293 }
 294
 295 static IRExpr* mkU8 ( UInt i )
 296 {
 297    vassert(i < 256);
 298    return IRExpr_Const(IRConst_U8( (UChar)i ));
 299 }
 300
 301 static IRExpr* mkexpr ( IRTemp tmp )
 302 {
 303    return IRExpr_RdTmp(tmp);
 304 }
 305
 306 static IRExpr* unop ( IROp op, IRExpr* a )
 307 {
 308    return IRExpr_Unop(op, a);
 309 }
 310
 311 static IRExpr* binop ( IROp op, IRExpr* a1, IRExpr* a2 )
 312 {
 313    return IRExpr_Binop(op, a1, a2);
 314 }
 315
 316 static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 )
 317 {
 318    return IRExpr_Triop(op, a1, a2, a3);
 319 }
 320
 321 static IRExpr* loadLE ( IRType ty, IRExpr* addr )
 322 {
 323    return IRExpr_Load(Iend_LE, ty, addr);
 324 }
 325
 326 /* Add a statement to the list held by "irbb". */
 327 static void stmt ( IRStmt* st )
 328 {
 329    addStmtToIRSB( irsb, st );
 330 }
 331
 332 static void assign ( IRTemp dst, IRExpr* e )
 333 {
 334    stmt( IRStmt_WrTmp(dst, e) );
 335 }
 336
 337 static void storeLE ( IRExpr* addr, IRExpr* data )
 338 {
 339    stmt( IRStmt_Store(Iend_LE, addr, data) );
 340 }
 341
 342 static void storeGuardedLE ( IRExpr* addr, IRExpr* data, IRTemp guardT )
 343 {
 344    if (guardT == IRTemp_INVALID) {
 345       /* unconditional */
 346       storeLE(addr, data);
 347    } else {
 348       stmt( IRStmt_StoreG(Iend_LE, addr, data,
 349                           binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
 350    }
 351 }
 352
 353 static void loadGuardedLE ( IRTemp dst, IRLoadGOp cvt,
 354                             IRExpr* addr, IRExpr* alt,
 355                             IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
 356 {
 357    if (guardT == IRTemp_INVALID) {
 358       /* unconditional */
 359       IRExpr* loaded = NULL;
 360       switch (cvt) {
 361          case ILGop_Ident32:
 362             loaded = loadLE(Ity_I32, addr); break;
 363          case ILGop_8Uto32:
 364             loaded = unop(Iop_8Uto32, loadLE(Ity_I8, addr)); break;
 365          case ILGop_8Sto32:
 366             loaded = unop(Iop_8Sto32, loadLE(Ity_I8, addr)); break;
 367          case ILGop_16Uto32:
 368             loaded = unop(Iop_16Uto32, loadLE(Ity_I16, addr)); break;
 369          case ILGop_16Sto32:
 370             loaded = unop(Iop_16Sto32, loadLE(Ity_I16, addr)); break;
 371          default:
 372             vassert(0);
 373       }
 374       vassert(loaded != NULL);
 375       assign(dst, loaded);
 376    } else {
 377       /* Generate a guarded load into 'dst', but apply 'cvt' to the
 378          loaded data before putting the data in 'dst'.  If the load
 379          does not take place, 'alt' is placed directly in 'dst'. */
 380       stmt( IRStmt_LoadG(Iend_LE, cvt, dst, addr, alt,
 381                          binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0))) );
 382    }
 383 }
 384
 385 /* Generate a new temporary of the given type. */
 386 static IRTemp newTemp ( IRType ty )
 387 {
 388    vassert(isPlausibleIRType(ty));
 389    return newIRTemp( irsb->tyenv, ty );
 390 }
 391
 392 /* Produces a value in 0 .. 3, which is encoded as per the type
 393    IRRoundingMode. */
 394 static IRExpr* /* :: Ity_I32 */ get_FAKE_roundingmode ( void )
 395 {
 396    return mkU32(Irrm_NEAREST);
 397 }
 398
 399 /* Generate an expression for SRC rotated right by ROT. */
 400 static IRExpr* genROR32( IRTemp src, Int rot )
 401 {
 402    vassert(rot >= 0 && rot < 32);
 403    if (rot == 0)
 404       return mkexpr(src);
 405    return
 406       binop(Iop_Or32,
 407             binop(Iop_Shl32, mkexpr(src), mkU8(32 - rot)),
 408             binop(Iop_Shr32, mkexpr(src), mkU8(rot)));
 409 }
 410
 411 static IRExpr* mkU128 ( ULong i )
 412 {
 413    return binop(Iop_64HLtoV128, mkU64(i), mkU64(i));
 414 }
 415
 416 /* Generate a 4-aligned version of the given expression if
 417    the given condition is true.  Else return it unchanged. */
 418 static IRExpr* align4if ( IRExpr* e, Bool b )
 419 {
 420    if (b)
 421       return binop(Iop_And32, e, mkU32(~3));
 422    else
 423       return e;
 424 }
 425
 426
 427 /*------------------------------------------------------------*/
 428 /*--- Helpers for accessing guest registers.               ---*/
 429 /*------------------------------------------------------------*/
 430
 431 #define OFFB_R0       offsetof(VexGuestARMState,guest_R0)
 432 #define OFFB_R1       offsetof(VexGuestARMState,guest_R1)
 433 #define OFFB_R2       offsetof(VexGuestARMState,guest_R2)
 434 #define OFFB_R3       offsetof(VexGuestARMState,guest_R3)
 435 #define OFFB_R4       offsetof(VexGuestARMState,guest_R4)
 436 #define OFFB_R5       offsetof(VexGuestARMState,guest_R5)
 437 #define OFFB_R6       offsetof(VexGuestARMState,guest_R6)
 438 #define OFFB_R7       offsetof(VexGuestARMState,guest_R7)
 439 #define OFFB_R8       offsetof(VexGuestARMState,guest_R8)
 440 #define OFFB_R9       offsetof(VexGuestARMState,guest_R9)
 441 #define OFFB_R10      offsetof(VexGuestARMState,guest_R10)
 442 #define OFFB_R11      offsetof(VexGuestARMState,guest_R11)
 443 #define OFFB_R12      offsetof(VexGuestARMState,guest_R12)
 444 #define OFFB_R13      offsetof(VexGuestARMState,guest_R13)
 445 #define OFFB_R14      offsetof(VexGuestARMState,guest_R14)
 446 #define OFFB_R15T     offsetof(VexGuestARMState,guest_R15T)
 447
 448 #define OFFB_CC_OP    offsetof(VexGuestARMState,guest_CC_OP)
 449 #define OFFB_CC_DEP1  offsetof(VexGuestARMState,guest_CC_DEP1)
 450 #define OFFB_CC_DEP2  offsetof(VexGuestARMState,guest_CC_DEP2)
 451 #define OFFB_CC_NDEP  offsetof(VexGuestARMState,guest_CC_NDEP)
 452 #define OFFB_NRADDR   offsetof(VexGuestARMState,guest_NRADDR)
 453
 454 #define OFFB_D0       offsetof(VexGuestARMState,guest_D0)
 455 #define OFFB_D1       offsetof(VexGuestARMState,guest_D1)
 456 #define OFFB_D2       offsetof(VexGuestARMState,guest_D2)
 457 #define OFFB_D3       offsetof(VexGuestARMState,guest_D3)
 458 #define OFFB_D4       offsetof(VexGuestARMState,guest_D4)
 459 #define OFFB_D5       offsetof(VexGuestARMState,guest_D5)
 460 #define OFFB_D6       offsetof(VexGuestARMState,guest_D6)
 461 #define OFFB_D7       offsetof(VexGuestARMState,guest_D7)
 462 #define OFFB_D8       offsetof(VexGuestARMState,guest_D8)
 463 #define OFFB_D9       offsetof(VexGuestARMState,guest_D9)
 464 #define OFFB_D10      offsetof(VexGuestARMState,guest_D10)
 465 #define OFFB_D11      offsetof(VexGuestARMState,guest_D11)
 466 #define OFFB_D12      offsetof(VexGuestARMState,guest_D12)
 467 #define OFFB_D13      offsetof(VexGuestARMState,guest_D13)
 468 #define OFFB_D14      offsetof(VexGuestARMState,guest_D14)
 469 #define OFFB_D15      offsetof(VexGuestARMState,guest_D15)
 470 #define OFFB_D16      offsetof(VexGuestARMState,guest_D16)
 471 #define OFFB_D17      offsetof(VexGuestARMState,guest_D17)
 472 #define OFFB_D18      offsetof(VexGuestARMState,guest_D18)
 473 #define OFFB_D19      offsetof(VexGuestARMState,guest_D19)
 474 #define OFFB_D20      offsetof(VexGuestARMState,guest_D20)
 475 #define OFFB_D21      offsetof(VexGuestARMState,guest_D21)
 476 #define OFFB_D22      offsetof(VexGuestARMState,guest_D22)
 477 #define OFFB_D23      offsetof(VexGuestARMState,guest_D23)
 478 #define OFFB_D24      offsetof(VexGuestARMState,guest_D24)
 479 #define OFFB_D25      offsetof(VexGuestARMState,guest_D25)
 480 #define OFFB_D26      offsetof(VexGuestARMState,guest_D26)
 481 #define OFFB_D27      offsetof(VexGuestARMState,guest_D27)
 482 #define OFFB_D28      offsetof(VexGuestARMState,guest_D28)
 483 #define OFFB_D29      offsetof(VexGuestARMState,guest_D29)
 484 #define OFFB_D30      offsetof(VexGuestARMState,guest_D30)
 485 #define OFFB_D31      offsetof(VexGuestARMState,guest_D31)
 486
 487 #define OFFB_FPSCR    offsetof(VexGuestARMState,guest_FPSCR)
 488 #define OFFB_TPIDRURO offsetof(VexGuestARMState,guest_TPIDRURO)
 489 #define OFFB_TPIDRURW offsetof(VexGuestARMState,guest_TPIDRURW)
 490 #define OFFB_ITSTATE  offsetof(VexGuestARMState,guest_ITSTATE)
 491 #define OFFB_QFLAG32  offsetof(VexGuestARMState,guest_QFLAG32)
 492 #define OFFB_GEFLAG0  offsetof(VexGuestARMState,guest_GEFLAG0)
 493 #define OFFB_GEFLAG1  offsetof(VexGuestARMState,guest_GEFLAG1)
 494 #define OFFB_GEFLAG2  offsetof(VexGuestARMState,guest_GEFLAG2)
 495 #define OFFB_GEFLAG3  offsetof(VexGuestARMState,guest_GEFLAG3)
 496
 497 #define OFFB_CMSTART  offsetof(VexGuestARMState,guest_CMSTART)
 498 #define OFFB_CMLEN    offsetof(VexGuestARMState,guest_CMLEN)
 499
 500
 501 /* ---------------- Integer registers ---------------- */
 502
 503 static Int integerGuestRegOffset ( UInt iregNo )
 504 {
 505    /* Do we care about endianness here?  We do if sub-parts of integer
 506       registers are accessed, but I don't think that ever happens on
 507       ARM. */
 508    switch (iregNo) {
 509       case 0:  return OFFB_R0;
 510       case 1:  return OFFB_R1;
 511       case 2:  return OFFB_R2;
 512       case 3:  return OFFB_R3;
 513       case 4:  return OFFB_R4;
 514       case 5:  return OFFB_R5;
 515       case 6:  return OFFB_R6;
 516       case 7:  return OFFB_R7;
 517       case 8:  return OFFB_R8;
 518       case 9:  return OFFB_R9;
 519       case 10: return OFFB_R10;
 520       case 11: return OFFB_R11;
 521       case 12: return OFFB_R12;
 522       case 13: return OFFB_R13;
 523       case 14: return OFFB_R14;
 524       case 15: return OFFB_R15T;
 525       default: vassert(0);
 526    }
 527 }
 528
 529 /* Plain ("low level") read from a reg; no +8 offset magic for r15. */
 530 static IRExpr* llGetIReg ( UInt iregNo )
 531 {
 532    vassert(iregNo < 16);
 533    return IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
 534 }
 535
 536 /* Architected read from a reg in ARM mode.  This automagically adds 8
 537    to all reads of r15. */
 538 static IRExpr* getIRegA ( UInt iregNo )
 539 {
 540    IRExpr* e;
 541    ASSERT_IS_ARM;
 542    vassert(iregNo < 16);
 543    if (iregNo == 15) {
 544       /* If asked for r15, don't read the guest state value, as that
 545          may not be up to date in the case where loop unrolling has
 546          happened, because the first insn's write to the block is
 547          omitted; hence in the 2nd and subsequent unrollings we don't
 548          have a correct value in guest r15.  Instead produce the
 549          constant that we know would be produced at this point. */
 550       vassert(0 == (guest_R15_curr_instr_notENC & 3));
 551       e = mkU32(guest_R15_curr_instr_notENC + 8);
 552    } else {
 553       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
 554    }
 555    return e;
 556 }
 557
 558 /* Architected read from a reg in Thumb mode.  This automagically adds
 559    4 to all reads of r15. */
 560 static IRExpr* getIRegT ( UInt iregNo )
 561 {
 562    IRExpr* e;
 563    ASSERT_IS_THUMB;
 564    vassert(iregNo < 16);
 565    if (iregNo == 15) {
 566       /* Ditto comment in getIReg. */
 567       vassert(0 == (guest_R15_curr_instr_notENC & 1));
 568       e = mkU32(guest_R15_curr_instr_notENC + 4);
 569    } else {
 570       e = IRExpr_Get( integerGuestRegOffset(iregNo), Ity_I32 );
 571    }
 572    return e;
 573 }
 574
 575 /* Plain ("low level") write to a reg; no jump or alignment magic for
 576    r15. */
 577 static void llPutIReg ( UInt iregNo, IRExpr* e )
 578 {
 579    vassert(iregNo < 16);
 580    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
 581    stmt( IRStmt_Put(integerGuestRegOffset(iregNo), e) );
 582 }
 583
 584 /* Architected write to an integer register in ARM mode.  If it is to
 585    r15, record info so at the end of this insn's translation, a branch
 586    to it can be made.  Also handles conditional writes to the
 587    register: if guardT == IRTemp_INVALID then the write is
 588    unconditional.  If writing r15, also 4-align it. */
 589 static void putIRegA ( UInt       iregNo,
 590                        IRExpr*    e,
 591                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */,
 592                        IRJumpKind jk /* if a jump is generated */ )
 593 {
 594    /* if writing r15, force e to be 4-aligned. */
 595    // INTERWORKING FIXME.  this needs to be relaxed so that
 596    // puts caused by LDMxx which load r15 interwork right.
 597    // but is no aligned too relaxed?
 598    //if (iregNo == 15)
 599    //   e = binop(Iop_And32, e, mkU32(~3));
 600    ASSERT_IS_ARM;
 601    /* So, generate either an unconditional or a conditional write to
 602       the reg. */
 603    if (guardT == IRTemp_INVALID) {
 604       /* unconditional write */
 605       llPutIReg( iregNo, e );
 606    } else {
 607       llPutIReg( iregNo,
 608                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 609                              e, llGetIReg(iregNo) ));
 610    }
 611    if (iregNo == 15) {
 612       // assert against competing r15 updates.  Shouldn't
 613       // happen; should be ruled out by the instr matching
 614       // logic.
 615       vassert(r15written == False);
 616       vassert(r15guard   == IRTemp_INVALID);
 617       vassert(r15kind    == Ijk_Boring);
 618       r15written = True;
 619       r15guard   = guardT;
 620       r15kind    = jk;
 621    }
 622 }
 623
 624
 625 /* Architected write to an integer register in Thumb mode.  Writes to
 626    r15 are not allowed.  Handles conditional writes to the register:
 627    if guardT == IRTemp_INVALID then the write is unconditional. */
 628 static void putIRegT ( UInt       iregNo,
 629                        IRExpr*    e,
 630                        IRTemp     guardT /* :: Ity_I32, 0 or 1 */ )
 631 {
 632    /* So, generate either an unconditional or a conditional write to
 633       the reg. */
 634    ASSERT_IS_THUMB;
 635    vassert(iregNo >= 0 && iregNo <= 14);
 636    if (guardT == IRTemp_INVALID) {
 637       /* unconditional write */
 638       llPutIReg( iregNo, e );
 639    } else {
 640       llPutIReg( iregNo,
 641                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 642                              e, llGetIReg(iregNo) ));
 643    }
 644 }
 645
 646
 647 /* Thumb16 and Thumb32 only.
 648    Returns true if reg is 13 or 15.  Implements the BadReg
 649    predicate in the ARM ARM. */
 650 static Bool isBadRegT ( UInt r )
 651 {
 652    vassert(r <= 15);
 653    ASSERT_IS_THUMB;
 654    return r == 13 || r == 15;
 655 }
 656
 657
 658 /* ---------------- Double registers ---------------- */
 659
 660 static Int doubleGuestRegOffset ( UInt dregNo )
 661 {
 662    /* Do we care about endianness here?  Probably do if we ever get
 663       into the situation of dealing with the single-precision VFP
 664       registers. */
 665    switch (dregNo) {
 666       case 0:  return OFFB_D0;
 667       case 1:  return OFFB_D1;
 668       case 2:  return OFFB_D2;
 669       case 3:  return OFFB_D3;
 670       case 4:  return OFFB_D4;
 671       case 5:  return OFFB_D5;
 672       case 6:  return OFFB_D6;
 673       case 7:  return OFFB_D7;
 674       case 8:  return OFFB_D8;
 675       case 9:  return OFFB_D9;
 676       case 10: return OFFB_D10;
 677       case 11: return OFFB_D11;
 678       case 12: return OFFB_D12;
 679       case 13: return OFFB_D13;
 680       case 14: return OFFB_D14;
 681       case 15: return OFFB_D15;
 682       case 16: return OFFB_D16;
 683       case 17: return OFFB_D17;
 684       case 18: return OFFB_D18;
 685       case 19: return OFFB_D19;
 686       case 20: return OFFB_D20;
 687       case 21: return OFFB_D21;
 688       case 22: return OFFB_D22;
 689       case 23: return OFFB_D23;
 690       case 24: return OFFB_D24;
 691       case 25: return OFFB_D25;
 692       case 26: return OFFB_D26;
 693       case 27: return OFFB_D27;
 694       case 28: return OFFB_D28;
 695       case 29: return OFFB_D29;
 696       case 30: return OFFB_D30;
 697       case 31: return OFFB_D31;
 698       default: vassert(0);
 699    }
 700 }
 701
 702 /* Plain ("low level") read from a VFP Dreg. */
 703 static IRExpr* llGetDReg ( UInt dregNo )
 704 {
 705    vassert(dregNo < 32);
 706    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_F64 );
 707 }
 708
 709 /* Architected read from a VFP Dreg. */
 710 static IRExpr* getDReg ( UInt dregNo ) {
 711    return llGetDReg( dregNo );
 712 }
 713
 714 /* Plain ("low level") write to a VFP Dreg. */
 715 static void llPutDReg ( UInt dregNo, IRExpr* e )
 716 {
 717    vassert(dregNo < 32);
 718    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F64);
 719    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
 720 }
 721
 722 /* Architected write to a VFP Dreg.  Handles conditional writes to the
 723    register: if guardT == IRTemp_INVALID then the write is
 724    unconditional. */
 725 static void putDReg ( UInt    dregNo,
 726                       IRExpr* e,
 727                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 728 {
 729    /* So, generate either an unconditional or a conditional write to
 730       the reg. */
 731    if (guardT == IRTemp_INVALID) {
 732       /* unconditional write */
 733       llPutDReg( dregNo, e );
 734    } else {
 735       llPutDReg( dregNo,
 736                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 737                              e, llGetDReg(dregNo) ));
 738    }
 739 }
 740
 741 /* And now exactly the same stuff all over again, but this time
 742    taking/returning I64 rather than F64, to support 64-bit Neon
 743    ops. */
 744
 745 /* Plain ("low level") read from a Neon Integer Dreg. */
 746 static IRExpr* llGetDRegI64 ( UInt dregNo )
 747 {
 748    vassert(dregNo < 32);
 749    return IRExpr_Get( doubleGuestRegOffset(dregNo), Ity_I64 );
 750 }
 751
 752 /* Architected read from a Neon Integer Dreg. */
 753 static IRExpr* getDRegI64 ( UInt dregNo ) {
 754    return llGetDRegI64( dregNo );
 755 }
 756
 757 /* Plain ("low level") write to a Neon Integer Dreg. */
 758 static void llPutDRegI64 ( UInt dregNo, IRExpr* e )
 759 {
 760    vassert(dregNo < 32);
 761    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I64);
 762    stmt( IRStmt_Put(doubleGuestRegOffset(dregNo), e) );
 763 }
 764
 765 /* Architected write to a Neon Integer Dreg.  Handles conditional
 766    writes to the register: if guardT == IRTemp_INVALID then the write
 767    is unconditional. */
 768 static void putDRegI64 ( UInt    dregNo,
 769                          IRExpr* e,
 770                          IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 771 {
 772    /* So, generate either an unconditional or a conditional write to
 773       the reg. */
 774    if (guardT == IRTemp_INVALID) {
 775       /* unconditional write */
 776       llPutDRegI64( dregNo, e );
 777    } else {
 778       llPutDRegI64( dregNo,
 779                     IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 780                                 e, llGetDRegI64(dregNo) ));
 781    }
 782 }
 783
 784 /* ---------------- Quad registers ---------------- */
 785
 786 static Int quadGuestRegOffset ( UInt qregNo )
 787 {
 788    /* Do we care about endianness here?  Probably do if we ever get
 789       into the situation of dealing with the 64 bit Neon registers. */
 790    switch (qregNo) {
 791       case 0:  return OFFB_D0;
 792       case 1:  return OFFB_D2;
 793       case 2:  return OFFB_D4;
 794       case 3:  return OFFB_D6;
 795       case 4:  return OFFB_D8;
 796       case 5:  return OFFB_D10;
 797       case 6:  return OFFB_D12;
 798       case 7:  return OFFB_D14;
 799       case 8:  return OFFB_D16;
 800       case 9:  return OFFB_D18;
 801       case 10: return OFFB_D20;
 802       case 11: return OFFB_D22;
 803       case 12: return OFFB_D24;
 804       case 13: return OFFB_D26;
 805       case 14: return OFFB_D28;
 806       case 15: return OFFB_D30;
 807       default: vassert(0);
 808    }
 809 }
 810
 811 /* Plain ("low level") read from a Neon Qreg. */
 812 static IRExpr* llGetQReg ( UInt qregNo )
 813 {
 814    vassert(qregNo < 16);
 815    return IRExpr_Get( quadGuestRegOffset(qregNo), Ity_V128 );
 816 }
 817
 818 /* Architected read from a Neon Qreg. */
 819 static IRExpr* getQReg ( UInt qregNo ) {
 820    return llGetQReg( qregNo );
 821 }
 822
 823 /* Plain ("low level") write to a Neon Qreg. */
 824 static void llPutQReg ( UInt qregNo, IRExpr* e )
 825 {
 826    vassert(qregNo < 16);
 827    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_V128);
 828    stmt( IRStmt_Put(quadGuestRegOffset(qregNo), e) );
 829 }
 830
 831 /* Architected write to a Neon Qreg.  Handles conditional writes to the
 832    register: if guardT == IRTemp_INVALID then the write is
 833    unconditional. */
 834 static void putQReg ( UInt    qregNo,
 835                       IRExpr* e,
 836                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 837 {
 838    /* So, generate either an unconditional or a conditional write to
 839       the reg. */
 840    if (guardT == IRTemp_INVALID) {
 841       /* unconditional write */
 842       llPutQReg( qregNo, e );
 843    } else {
 844       llPutQReg( qregNo,
 845                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 846                              e, llGetQReg(qregNo) ));
 847    }
 848 }
 849
 850
 851 /* ---------------- Float registers ---------------- */
 852
 853 static Int floatGuestRegOffset ( UInt fregNo )
 854 {
 855    /* Start with the offset of the containing double, and then correct
 856       for endianness.  Actually this is completely bogus and needs
 857       careful thought. */
 858    Int off;
 859    /* NB! Limit is 64, not 32, because we might be pulling F32 bits
 860       out of SIMD registers, and there are 16 SIMD registers each of
 861       128 bits (4 x F32). */
 862    vassert(fregNo < 64);
 863    off = doubleGuestRegOffset(fregNo >> 1);
 864    if (host_endness == VexEndnessLE) {
 865       if (fregNo & 1)
 866          off += 4;
 867    } else {
 868       vassert(0);
 869    }
 870    return off;
 871 }
 872
 873 /* Plain ("low level") read from a VFP Freg. */
 874 static IRExpr* llGetFReg ( UInt fregNo )
 875 {
 876    vassert(fregNo < 32);
 877    return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
 878 }
 879
 880 static IRExpr* llGetFReg_up_to_64 ( UInt fregNo )
 881 {
 882    vassert(fregNo < 64);
 883    return IRExpr_Get( floatGuestRegOffset(fregNo), Ity_F32 );
 884 }
 885
 886 /* Architected read from a VFP Freg. */
 887 static IRExpr* getFReg ( UInt fregNo ) {
 888    return llGetFReg( fregNo );
 889 }
 890
 891 /* Plain ("low level") write to a VFP Freg. */
 892 static void llPutFReg ( UInt fregNo, IRExpr* e )
 893 {
 894    vassert(fregNo < 32);
 895    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
 896    stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
 897 }
 898
 899 static void llPutFReg_up_to_64 ( UInt fregNo, IRExpr* e )
 900 {
 901    vassert(fregNo < 64);
 902    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_F32);
 903    stmt( IRStmt_Put(floatGuestRegOffset(fregNo), e) );
 904 }
 905
 906 /* Architected write to a VFP Freg.  Handles conditional writes to the
 907    register: if guardT == IRTemp_INVALID then the write is
 908    unconditional. */
 909 static void putFReg ( UInt    fregNo,
 910                       IRExpr* e,
 911                       IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 912 {
 913    /* So, generate either an unconditional or a conditional write to
 914       the reg. */
 915    if (guardT == IRTemp_INVALID) {
 916       /* unconditional write */
 917       llPutFReg( fregNo, e );
 918    } else {
 919       llPutFReg( fregNo,
 920                  IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 921                              e, llGetFReg(fregNo) ));
 922    }
 923 }
 924
 925
 926 /* ---------------- Misc registers ---------------- */
 927
 928 static void putMiscReg32 ( UInt    gsoffset,
 929                            IRExpr* e, /* :: Ity_I32 */
 930                            IRTemp  guardT /* :: Ity_I32, 0 or 1 */)
 931 {
 932    switch (gsoffset) {
 933       case OFFB_FPSCR:   break;
 934       case OFFB_QFLAG32: break;
 935       case OFFB_GEFLAG0: break;
 936       case OFFB_GEFLAG1: break;
 937       case OFFB_GEFLAG2: break;
 938       case OFFB_GEFLAG3: break;
 939       case OFFB_TPIDRURW: break;
 940       default: vassert(0); /* awaiting more cases */
 941    }
 942    vassert(typeOfIRExpr(irsb->tyenv, e) == Ity_I32);
 943
 944    if (guardT == IRTemp_INVALID) {
 945       /* unconditional write */
 946       stmt(IRStmt_Put(gsoffset, e));
 947    } else {
 948       stmt(IRStmt_Put(
 949          gsoffset,
 950          IRExpr_ITE( binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)),
 951                      e, IRExpr_Get(gsoffset, Ity_I32) )
 952       ));
 953    }
 954 }
 955
 956 static IRTemp get_ITSTATE ( void )
 957 {
 958    ASSERT_IS_THUMB;
 959    IRTemp t = newTemp(Ity_I32);
 960    assign(t, IRExpr_Get( OFFB_ITSTATE, Ity_I32));
 961    return t;
 962 }
 963
 964 static void put_ITSTATE ( IRTemp t )
 965 {
 966    ASSERT_IS_THUMB;
 967    stmt( IRStmt_Put( OFFB_ITSTATE, mkexpr(t)) );
 968 }
 969
 970 static IRTemp get_QFLAG32 ( void )
 971 {
 972    IRTemp t = newTemp(Ity_I32);
 973    assign(t, IRExpr_Get( OFFB_QFLAG32, Ity_I32));
 974    return t;
 975 }
 976
 977 static void put_QFLAG32 ( IRTemp t, IRTemp condT )
 978 {
 979    putMiscReg32( OFFB_QFLAG32, mkexpr(t), condT );
 980 }
 981
 982 /* Stickily set the 'Q' flag (APSR bit 27) of the APSR (Application Program
 983    Status Register) to indicate that overflow or saturation occurred.
 984    Nb: t must be zero to denote no saturation, and any nonzero
 985    value to indicate saturation. */
 986 static void or_into_QFLAG32 ( IRExpr* e, IRTemp condT )
 987 {
 988    IRTemp old = get_QFLAG32();
 989    IRTemp nyu = newTemp(Ity_I32);
 990    assign(nyu, binop(Iop_Or32, mkexpr(old), e) );
 991    put_QFLAG32(nyu, condT);
 992 }
 993
 994 /* Generate code to set APSR.GE[flagNo]. Each fn call sets 1 bit.
 995    flagNo: which flag bit to set [3...0]
 996    lowbits_to_ignore:  0 = look at all 32 bits
 997                        8 = look at top 24 bits only
 998                       16 = look at top 16 bits only
 999                       31 = look at the top bit only
1000    e: input value to be evaluated.
1001    The new value is taken from 'e' with the lowest 'lowbits_to_ignore'
1002    masked out.  If the resulting value is zero then the GE flag is
1003    set to 0; any other value sets the flag to 1. */
1004 static void put_GEFLAG32 ( Int flagNo,            /* 0, 1, 2 or 3 */
1005                            Int lowbits_to_ignore, /* 0, 8, 16 or 31   */
1006                            IRExpr* e,             /* Ity_I32 */
1007                            IRTemp condT )
1008 {
1009    vassert( flagNo >= 0 && flagNo <= 3 );
1010    vassert( lowbits_to_ignore == 0  ||
1011             lowbits_to_ignore == 8  ||
1012             lowbits_to_ignore == 16 ||
1013             lowbits_to_ignore == 31 );
1014    IRTemp masked = newTemp(Ity_I32);
1015    assign(masked, binop(Iop_Shr32, e, mkU8(lowbits_to_ignore)));
1016
1017    switch (flagNo) {
1018       case 0: putMiscReg32(OFFB_GEFLAG0, mkexpr(masked), condT); break;
1019       case 1: putMiscReg32(OFFB_GEFLAG1, mkexpr(masked), condT); break;
1020       case 2: putMiscReg32(OFFB_GEFLAG2, mkexpr(masked), condT); break;
1021       case 3: putMiscReg32(OFFB_GEFLAG3, mkexpr(masked), condT); break;
1022       default: vassert(0);
1023    }
1024 }
1025
1026 /* Return the (32-bit, zero-or-nonzero representation scheme) of
1027    the specified GE flag. */
1028 static IRExpr* get_GEFLAG32( Int flagNo /* 0, 1, 2, 3 */ )
1029 {
1030    switch (flagNo) {
1031       case 0: return IRExpr_Get( OFFB_GEFLAG0, Ity_I32 );
1032       case 1: return IRExpr_Get( OFFB_GEFLAG1, Ity_I32 );
1033       case 2: return IRExpr_Get( OFFB_GEFLAG2, Ity_I32 );
1034       case 3: return IRExpr_Get( OFFB_GEFLAG3, Ity_I32 );
1035       default: vassert(0);
1036    }
1037 }
1038
1039 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3 and
1040    2 are set from bit 31 of the value, and GE 1 and 0 are set from bit
1041    15 of the value.  All other bits are ignored. */
1042 static void set_GE_32_10_from_bits_31_15 ( IRTemp t32, IRTemp condT )
1043 {
1044    IRTemp ge10 = newTemp(Ity_I32);
1045    IRTemp ge32 = newTemp(Ity_I32);
1046    assign(ge10, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1047    assign(ge32, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1048    put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
1049    put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
1050    put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
1051    put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
1052 }
1053
1054
1055 /* Set all 4 GE flags from the given 32-bit value as follows: GE 3
1056    from bit 31, GE 2 from bit 23, GE 1 from bit 15, and GE0 from
1057    bit 7.  All other bits are ignored. */
1058 static void set_GE_3_2_1_0_from_bits_31_23_15_7 ( IRTemp t32, IRTemp condT )
1059 {
1060    IRTemp ge0 = newTemp(Ity_I32);
1061    IRTemp ge1 = newTemp(Ity_I32);
1062    IRTemp ge2 = newTemp(Ity_I32);
1063    IRTemp ge3 = newTemp(Ity_I32);
1064    assign(ge0, binop(Iop_And32, mkexpr(t32), mkU32(0x00000080)));
1065    assign(ge1, binop(Iop_And32, mkexpr(t32), mkU32(0x00008000)));
1066    assign(ge2, binop(Iop_And32, mkexpr(t32), mkU32(0x00800000)));
1067    assign(ge3, binop(Iop_And32, mkexpr(t32), mkU32(0x80000000)));
1068    put_GEFLAG32( 0, 0, mkexpr(ge0), condT );
1069    put_GEFLAG32( 1, 0, mkexpr(ge1), condT );
1070    put_GEFLAG32( 2, 0, mkexpr(ge2), condT );
1071    put_GEFLAG32( 3, 0, mkexpr(ge3), condT );
1072 }
1073
1074
1075 /* ---------------- FPSCR stuff ---------------- */
1076
1077 /* Generate IR to get hold of the rounding mode bits in FPSCR, and
1078    convert them to IR format.  Bind the final result to the
1079    returned temp. */
1080 static IRTemp /* :: Ity_I32 */ mk_get_IR_rounding_mode ( void )
1081 {
1082    /* The ARMvfp encoding for rounding mode bits is:
1083          00  to nearest
1084          01  to +infinity
1085          10  to -infinity
1086          11  to zero
1087       We need to convert that to the IR encoding:
1088          00  to nearest (the default)
1089          10  to +infinity
1090          01  to -infinity
1091          11  to zero
1092       Which can be done by swapping bits 0 and 1.
1093       The rmode bits are at 23:22 in FPSCR.
1094    */
1095    IRTemp armEncd = newTemp(Ity_I32);
1096    IRTemp swapped = newTemp(Ity_I32);
1097    /* Fish FPSCR[23:22] out, and slide to bottom.  Doesn't matter that
1098       we don't zero out bits 24 and above, since the assignment to
1099       'swapped' will mask them out anyway. */
1100    assign(armEncd,
1101           binop(Iop_Shr32, IRExpr_Get(OFFB_FPSCR, Ity_I32), mkU8(22)));
1102    /* Now swap them. */
1103    assign(swapped,
1104           binop(Iop_Or32,
1105                 binop(Iop_And32,
1106                       binop(Iop_Shl32, mkexpr(armEncd), mkU8(1)),
1107                       mkU32(2)),
1108                 binop(Iop_And32,
1109                       binop(Iop_Shr32, mkexpr(armEncd), mkU8(1)),
1110                       mkU32(1))
1111          ));
1112    return swapped;
1113 }
1114
1115
1116 /*------------------------------------------------------------*/
1117 /*--- Helpers for flag handling and conditional insns      ---*/
1118 /*------------------------------------------------------------*/
1119
1120 static const HChar* name_ARMCondcode ( ARMCondcode cond )
1121 {
1122    switch (cond) {
1123       case ARMCondEQ:  return "{eq}";
1124       case ARMCondNE:  return "{ne}";
1125       case ARMCondHS:  return "{hs}";  // or 'cs'
1126       case ARMCondLO:  return "{lo}";  // or 'cc'
1127       case ARMCondMI:  return "{mi}";
1128       case ARMCondPL:  return "{pl}";
1129       case ARMCondVS:  return "{vs}";
1130       case ARMCondVC:  return "{vc}";
1131       case ARMCondHI:  return "{hi}";
1132       case ARMCondLS:  return "{ls}";
1133       case ARMCondGE:  return "{ge}";
1134       case ARMCondLT:  return "{lt}";
1135       case ARMCondGT:  return "{gt}";
1136       case ARMCondLE:  return "{le}";
1137       case ARMCondAL:  return ""; // {al}: is the default
1138       case ARMCondNV:  return "{nv}";
1139       default: vpanic("name_ARMCondcode");
1140    }
1141 }
1142 /* and a handy shorthand for it */
1143 static const HChar* nCC ( ARMCondcode cond ) {
1144    return name_ARMCondcode(cond);
1145 }
1146
1147
1148 /* Build IR to calculate some particular condition from stored
1149    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1150    Ity_I32, suitable for narrowing.  Although the return type is
1151    Ity_I32, the returned value is either 0 or 1.  'cond' must be
1152    :: Ity_I32 and must denote the condition to compute in
1153    bits 7:4, and be zero everywhere else.
1154 */
1155 static IRExpr* mk_armg_calculate_condition_dyn ( IRExpr* cond )
1156 {
1157    vassert(typeOfIRExpr(irsb->tyenv, cond) == Ity_I32);
1158    /* And 'cond' had better produce a value in which only bits 7:4 are
1159       nonzero.  However, obviously we can't assert for that. */
1160
1161    /* So what we're constructing for the first argument is
1162       "(cond << 4) | stored-operation".
1163       However, as per comments above, 'cond' must be supplied
1164       pre-shifted to this function.
1165
1166       This pairing scheme requires that the ARM_CC_OP_ values all fit
1167       in 4 bits.  Hence we are passing a (COND, OP) pair in the lowest
1168       8 bits of the first argument. */
1169    IRExpr** args
1170       = mkIRExprVec_4(
1171            binop(Iop_Or32, IRExpr_Get(OFFB_CC_OP, Ity_I32), cond),
1172            IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1173            IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1174            IRExpr_Get(OFFB_CC_NDEP, Ity_I32)
1175         );
1176    IRExpr* call
1177       = mkIRExprCCall(
1178            Ity_I32,
1179            0/*regparm*/,
1180            "armg_calculate_condition", &armg_calculate_condition,
1181            args
1182         );
1183
1184    /* Exclude the requested condition, OP and NDEP from definedness
1185       checking.  We're only interested in DEP1 and DEP2. */
1186    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1187    return call;
1188 }
1189
1190
1191 /* Build IR to calculate some particular condition from stored
1192    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression of type
1193    Ity_I32, suitable for narrowing.  Although the return type is
1194    Ity_I32, the returned value is either 0 or 1.
1195 */
1196 static IRExpr* mk_armg_calculate_condition ( ARMCondcode cond )
1197 {
1198   /* First arg is "(cond << 4) | condition".  This requires that the
1199      ARM_CC_OP_ values all fit in 4 bits.  Hence we are passing a
1200      (COND, OP) pair in the lowest 8 bits of the first argument. */
1201    vassert(cond >= 0 && cond <= 15);
1202    return mk_armg_calculate_condition_dyn( mkU32(cond << 4) );
1203 }
1204
1205
1206 /* Build IR to calculate just the carry flag from stored
1207    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1208    Ity_I32. */
1209 static IRExpr* mk_armg_calculate_flag_c ( void )
1210 {
1211    IRExpr** args
1212       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1213                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1214                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1215                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1216    IRExpr* call
1217       = mkIRExprCCall(
1218            Ity_I32,
1219            0/*regparm*/,
1220            "armg_calculate_flag_c", &armg_calculate_flag_c,
1221            args
1222         );
1223    /* Exclude OP and NDEP from definedness checking.  We're only
1224       interested in DEP1 and DEP2. */
1225    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1226    return call;
1227 }
1228
1229
1230 /* Build IR to calculate just the overflow flag from stored
1231    CC_OP/CC_DEP1/CC_DEP2/CC_NDEP.  Returns an expression ::
1232    Ity_I32. */
1233 static IRExpr* mk_armg_calculate_flag_v ( void )
1234 {
1235    IRExpr** args
1236       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1237                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1238                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1239                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1240    IRExpr* call
1241       = mkIRExprCCall(
1242            Ity_I32,
1243            0/*regparm*/,
1244            "armg_calculate_flag_v", &armg_calculate_flag_v,
1245            args
1246         );
1247    /* Exclude OP and NDEP from definedness checking.  We're only
1248       interested in DEP1 and DEP2. */
1249    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1250    return call;
1251 }
1252
1253
1254 /* Build IR to calculate N Z C V in bits 31:28 of the
1255    returned word. */
1256 static IRExpr* mk_armg_calculate_flags_nzcv ( void )
1257 {
1258    IRExpr** args
1259       = mkIRExprVec_4( IRExpr_Get(OFFB_CC_OP,   Ity_I32),
1260                        IRExpr_Get(OFFB_CC_DEP1, Ity_I32),
1261                        IRExpr_Get(OFFB_CC_DEP2, Ity_I32),
1262                        IRExpr_Get(OFFB_CC_NDEP, Ity_I32) );
1263    IRExpr* call
1264       = mkIRExprCCall(
1265            Ity_I32,
1266            0/*regparm*/,
1267            "armg_calculate_flags_nzcv", &armg_calculate_flags_nzcv,
1268            args
1269         );
1270    /* Exclude OP and NDEP from definedness checking.  We're only
1271       interested in DEP1 and DEP2. */
1272    call->Iex.CCall.cee->mcx_mask = (1<<0) | (1<<3);
1273    return call;
1274 }
1275
1276 static IRExpr* mk_armg_calculate_flag_qc ( IRExpr* resL, IRExpr* resR, Bool Q )
1277 {
1278    IRExpr** args1;
1279    IRExpr** args2;
1280    IRExpr *call1, *call2, *res;
1281
1282    if (Q) {
1283       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(0)),
1284                               binop(Iop_GetElem32x4, resL, mkU8(1)),
1285                               binop(Iop_GetElem32x4, resR, mkU8(0)),
1286                               binop(Iop_GetElem32x4, resR, mkU8(1)) );
1287       args2 = mkIRExprVec_4 ( binop(Iop_GetElem32x4, resL, mkU8(2)),
1288                               binop(Iop_GetElem32x4, resL, mkU8(3)),
1289                               binop(Iop_GetElem32x4, resR, mkU8(2)),
1290                               binop(Iop_GetElem32x4, resR, mkU8(3)) );
1291    } else {
1292       args1 = mkIRExprVec_4 ( binop(Iop_GetElem32x2, resL, mkU8(0)),
1293                               binop(Iop_GetElem32x2, resL, mkU8(1)),
1294                               binop(Iop_GetElem32x2, resR, mkU8(0)),
1295                               binop(Iop_GetElem32x2, resR, mkU8(1)) );
1296    }
1297
1298    call1 = mkIRExprCCall(
1299              Ity_I32,
1300              0/*regparm*/,
1301              "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1302              args1
1303           );
1304    if (Q) {
1305       call2 = mkIRExprCCall(
1306                 Ity_I32,
1307                 0/*regparm*/,
1308                 "armg_calculate_flag_qc", &armg_calculate_flag_qc,
1309                 args2
1310              );
1311    }
1312    if (Q) {
1313       res = binop(Iop_Or32, call1, call2);
1314    } else {
1315       res = call1;
1316    }
1317    return res;
1318 }
1319
1320 // FIXME: this is named wrongly .. looks like a sticky set of
1321 // QC, not a write to it.
1322 static void setFlag_QC ( IRExpr* resL, IRExpr* resR, Bool Q,
1323                          IRTemp condT )
1324 {
1325    putMiscReg32 (OFFB_FPSCR,
1326                  binop(Iop_Or32,
1327                        IRExpr_Get(OFFB_FPSCR, Ity_I32),
1328                        binop(Iop_Shl32,
1329                              mk_armg_calculate_flag_qc(resL, resR, Q),
1330                              mkU8(27))),
1331                  condT);
1332 }
1333
1334 /* Build IR to conditionally set the flags thunk.  As with putIReg, if
1335    guard is IRTemp_INVALID then it's unconditional, else it holds a
1336    condition :: Ity_I32. */
1337 static
1338 void setFlags_D1_D2_ND ( UInt cc_op, IRTemp t_dep1,
1339                          IRTemp t_dep2, IRTemp t_ndep,
1340                          IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1341 {
1342    vassert(typeOfIRTemp(irsb->tyenv, t_dep1 == Ity_I32));
1343    vassert(typeOfIRTemp(irsb->tyenv, t_dep2 == Ity_I32));
1344    vassert(typeOfIRTemp(irsb->tyenv, t_ndep == Ity_I32));
1345    vassert(cc_op >= ARMG_CC_OP_COPY && cc_op < ARMG_CC_OP_NUMBER);
1346    if (guardT == IRTemp_INVALID) {
1347       /* unconditional */
1348       stmt( IRStmt_Put( OFFB_CC_OP,   mkU32(cc_op) ));
1349       stmt( IRStmt_Put( OFFB_CC_DEP1, mkexpr(t_dep1) ));
1350       stmt( IRStmt_Put( OFFB_CC_DEP2, mkexpr(t_dep2) ));
1351       stmt( IRStmt_Put( OFFB_CC_NDEP, mkexpr(t_ndep) ));
1352    } else {
1353       /* conditional */
1354       IRTemp c1 = newTemp(Ity_I1);
1355       assign( c1, binop(Iop_CmpNE32, mkexpr(guardT), mkU32(0)) );
1356       stmt( IRStmt_Put(
1357                OFFB_CC_OP,
1358                IRExpr_ITE( mkexpr(c1),
1359                            mkU32(cc_op),
1360                            IRExpr_Get(OFFB_CC_OP, Ity_I32) ) ));
1361       stmt( IRStmt_Put(
1362                OFFB_CC_DEP1,
1363                IRExpr_ITE( mkexpr(c1),
1364                            mkexpr(t_dep1),
1365                            IRExpr_Get(OFFB_CC_DEP1, Ity_I32) ) ));
1366       stmt( IRStmt_Put(
1367                OFFB_CC_DEP2,
1368                IRExpr_ITE( mkexpr(c1),
1369                            mkexpr(t_dep2),
1370                            IRExpr_Get(OFFB_CC_DEP2, Ity_I32) ) ));
1371       stmt( IRStmt_Put(
1372                OFFB_CC_NDEP,
1373                IRExpr_ITE( mkexpr(c1),
1374                            mkexpr(t_ndep),
1375                            IRExpr_Get(OFFB_CC_NDEP, Ity_I32) ) ));
1376    }
1377 }
1378
1379
1380 /* Minor variant of the above that sets NDEP to zero (if it
1381    sets it at all) */
1382 static void setFlags_D1_D2 ( UInt cc_op, IRTemp t_dep1,
1383                              IRTemp t_dep2,
1384                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1385 {
1386    IRTemp z32 = newTemp(Ity_I32);
1387    assign( z32, mkU32(0) );
1388    setFlags_D1_D2_ND( cc_op, t_dep1, t_dep2, z32, guardT );
1389 }
1390
1391
1392 /* Minor variant of the above that sets DEP2 to zero (if it
1393    sets it at all) */
1394 static void setFlags_D1_ND ( UInt cc_op, IRTemp t_dep1,
1395                              IRTemp t_ndep,
1396                              IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1397 {
1398    IRTemp z32 = newTemp(Ity_I32);
1399    assign( z32, mkU32(0) );
1400    setFlags_D1_D2_ND( cc_op, t_dep1, z32, t_ndep, guardT );
1401 }
1402
1403
1404 /* Minor variant of the above that sets DEP2 and NDEP to zero (if it
1405    sets them at all) */
1406 static void setFlags_D1 ( UInt cc_op, IRTemp t_dep1,
1407                           IRTemp guardT /* :: Ity_I32, 0 or 1 */ )
1408 {
1409    IRTemp z32 = newTemp(Ity_I32);
1410    assign( z32, mkU32(0) );
1411    setFlags_D1_D2_ND( cc_op, t_dep1, z32, z32, guardT );
1412 }
1413
1414
1415 /* ARM only */
1416 /* Generate a side-exit to the next instruction, if the given guard
1417    expression :: Ity_I32 is 0 (note!  the side exit is taken if the
1418    condition is false!)  This is used to skip over conditional
1419    instructions which we can't generate straight-line code for, either
1420    because they are too complex or (more likely) they potentially
1421    generate exceptions.
1422 */
1423 static void mk_skip_over_A32_if_cond_is_false (
1424                IRTemp guardT /* :: Ity_I32, 0 or 1 */
1425             )
1426 {
1427    ASSERT_IS_ARM;
1428    vassert(guardT != IRTemp_INVALID);
1429    vassert(0 == (guest_R15_curr_instr_notENC & 3));
1430    stmt( IRStmt_Exit(
1431             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1432             Ijk_Boring,
1433             IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
1434             OFFB_R15T
1435        ));
1436 }
1437
1438 /* Thumb16 only */
1439 /* ditto, but jump over a 16-bit thumb insn */
1440 static void mk_skip_over_T16_if_cond_is_false (
1441                IRTemp guardT /* :: Ity_I32, 0 or 1 */
1442             )
1443 {
1444    ASSERT_IS_THUMB;
1445    vassert(guardT != IRTemp_INVALID);
1446    vassert(0 == (guest_R15_curr_instr_notENC & 1));
1447    stmt( IRStmt_Exit(
1448             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1449             Ijk_Boring,
1450             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
1451             OFFB_R15T
1452        ));
1453 }
1454
1455
1456 /* Thumb32 only */
1457 /* ditto, but jump over a 32-bit thumb insn */
1458 static void mk_skip_over_T32_if_cond_is_false (
1459                IRTemp guardT /* :: Ity_I32, 0 or 1 */
1460             )
1461 {
1462    ASSERT_IS_THUMB;
1463    vassert(guardT != IRTemp_INVALID);
1464    vassert(0 == (guest_R15_curr_instr_notENC & 1));
1465    stmt( IRStmt_Exit(
1466             unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
1467             Ijk_Boring,
1468             IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
1469             OFFB_R15T
1470        ));
1471 }
1472
1473
1474 /* Thumb16 and Thumb32 only
1475    Generate a SIGILL followed by a restart of the current instruction
1476    if the given temp is nonzero. */
1477 static void gen_SIGILL_T_if_nonzero ( IRTemp t /* :: Ity_I32 */ )
1478 {
1479    ASSERT_IS_THUMB;
1480    vassert(t != IRTemp_INVALID);
1481    vassert(0 == (guest_R15_curr_instr_notENC & 1));
1482    stmt(
1483       IRStmt_Exit(
1484          binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
1485          Ijk_NoDecode,
1486          IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
1487          OFFB_R15T
1488       )
1489    );
1490 }
1491
1492
1493 /* Inspect the old_itstate, and generate a SIGILL if it indicates that
1494    we are currently in an IT block and are not the last in the block.
1495    This also rolls back guest_ITSTATE to its old value before the exit
1496    and restores it to its new value afterwards.  This is so that if
1497    the exit is taken, we have an up to date version of ITSTATE
1498    available.  Without doing that, we have no hope of making precise
1499    exceptions work. */
1500 static void gen_SIGILL_T_if_in_but_NLI_ITBlock (
1501                IRTemp old_itstate /* :: Ity_I32 */,
1502                IRTemp new_itstate /* :: Ity_I32 */
1503             )
1504 {
1505    ASSERT_IS_THUMB;
1506    put_ITSTATE(old_itstate); // backout
1507    IRTemp guards_for_next3 = newTemp(Ity_I32);
1508    assign(guards_for_next3,
1509           binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
1510    gen_SIGILL_T_if_nonzero(guards_for_next3);
1511    put_ITSTATE(new_itstate); //restore
1512 }
1513
1514
1515 /* Simpler version of the above, which generates a SIGILL if
1516    we're anywhere within an IT block. */
1517 static void gen_SIGILL_T_if_in_ITBlock (
1518                IRTemp old_itstate /* :: Ity_I32 */,
1519                IRTemp new_itstate /* :: Ity_I32 */
1520             )
1521 {
1522    put_ITSTATE(old_itstate); // backout
1523    gen_SIGILL_T_if_nonzero(old_itstate);
1524    put_ITSTATE(new_itstate); //restore
1525 }
1526
1527
1528 /* Generate an APSR value, from the NZCV thunk, and
1529    from QFLAG32 and GEFLAG0 .. GEFLAG3. */
1530 static IRTemp synthesise_APSR ( void )
1531 {
1532    IRTemp res1 = newTemp(Ity_I32);
1533    // Get NZCV
1534    assign( res1, mk_armg_calculate_flags_nzcv() );
1535    // OR in the Q value
1536    IRTemp res2 = newTemp(Ity_I32);
1537    assign(
1538       res2,
1539       binop(Iop_Or32,
1540             mkexpr(res1),
1541             binop(Iop_Shl32,
1542                   unop(Iop_1Uto32,
1543                        binop(Iop_CmpNE32,
1544                              mkexpr(get_QFLAG32()),
1545                              mkU32(0))),
1546                   mkU8(ARMG_CC_SHIFT_Q)))
1547    );
1548    // OR in GE0 .. GE3
1549    IRExpr* ge0
1550       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(0), mkU32(0)));
1551    IRExpr* ge1
1552       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(1), mkU32(0)));
1553    IRExpr* ge2
1554       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(2), mkU32(0)));
1555    IRExpr* ge3
1556       = unop(Iop_1Uto32, binop(Iop_CmpNE32, get_GEFLAG32(3), mkU32(0)));
1557    IRTemp res3 = newTemp(Ity_I32);
1558    assign(res3,
1559           binop(Iop_Or32,
1560                 mkexpr(res2),
1561                 binop(Iop_Or32,
1562                       binop(Iop_Or32,
1563                             binop(Iop_Shl32, ge0, mkU8(16)),
1564                             binop(Iop_Shl32, ge1, mkU8(17))),
1565                       binop(Iop_Or32,
1566                             binop(Iop_Shl32, ge2, mkU8(18)),
1567                             binop(Iop_Shl32, ge3, mkU8(19))) )));
1568    return res3;
1569 }
1570
1571
1572 /* and the inverse transformation: given an APSR value,
1573    set the NZCV thunk, the Q flag, and the GE flags. */
1574 static void desynthesise_APSR ( Bool write_nzcvq, Bool write_ge,
1575                                 IRTemp apsrT, IRTemp condT )
1576 {
1577    vassert(write_nzcvq || write_ge);
1578    if (write_nzcvq) {
1579       // Do NZCV
1580       IRTemp immT = newTemp(Ity_I32);
1581       assign(immT, binop(Iop_And32, mkexpr(apsrT), mkU32(0xF0000000)) );
1582       setFlags_D1(ARMG_CC_OP_COPY, immT, condT);
1583       // Do Q
1584       IRTemp qnewT = newTemp(Ity_I32);
1585       assign(qnewT, binop(Iop_And32, mkexpr(apsrT), mkU32(ARMG_CC_MASK_Q)));
1586       put_QFLAG32(qnewT, condT);
1587    }
1588    if (write_ge) {
1589       // Do GE3..0
1590       put_GEFLAG32(0, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<16)),
1591                    condT);
1592       put_GEFLAG32(1, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<17)),
1593                    condT);
1594       put_GEFLAG32(2, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<18)),
1595                    condT);
1596       put_GEFLAG32(3, 0, binop(Iop_And32, mkexpr(apsrT), mkU32(1<<19)),
1597                    condT);
1598    }
1599 }
1600
1601
1602 /*------------------------------------------------------------*/
1603 /*--- Helpers for saturation                               ---*/
1604 /*------------------------------------------------------------*/
1605
1606 /* FIXME: absolutely the only diff. between (a) armUnsignedSatQ and
1607    (b) armSignedSatQ is that in (a) the floor is set to 0, whereas in
1608    (b) the floor is computed from the value of imm5.  these two fnsn
1609    should be commoned up. */
1610
1611 /* UnsignedSatQ(): 'clamp' each value so it lies between 0 <= x <= (2^N)-1
1612    Optionally return flag resQ saying whether saturation occurred.
1613    See definition in manual, section A2.2.1, page 41
1614    (bits(N), boolean) UnsignedSatQ( integer i, integer N )
1615    {
1616      if ( i > (2^N)-1 ) { result = (2^N)-1; saturated = TRUE; }
1617      elsif ( i < 0 )    { result = 0; saturated = TRUE; }
1618      else               { result = i; saturated = FALSE; }
1619      return ( result<N-1:0>, saturated );
1620    }
1621 */
1622 static void armUnsignedSatQ( IRTemp* res,  /* OUT - Ity_I32 */
1623                              IRTemp* resQ, /* OUT - Ity_I32  */
1624                              IRTemp regT,  /* value to clamp - Ity_I32 */
1625                              UInt imm5 )   /* saturation ceiling */
1626 {
1627    ULong ceil64  = (1ULL << imm5) - 1;    // (2^imm5)-1
1628    UInt  ceil    = (UInt)ceil64;
1629    UInt  floor   = 0;
1630
1631    IRTemp nd0 = newTemp(Ity_I32);
1632    IRTemp nd1 = newTemp(Ity_I32);
1633    IRTemp nd2 = newTemp(Ity_I1);
1634    IRTemp nd3 = newTemp(Ity_I32);
1635    IRTemp nd4 = newTemp(Ity_I32);
1636    IRTemp nd5 = newTemp(Ity_I1);
1637    IRTemp nd6 = newTemp(Ity_I32);
1638
1639    assign( nd0, mkexpr(regT) );
1640    assign( nd1, mkU32(ceil) );
1641    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1642    assign( nd3, IRExpr_ITE(mkexpr(nd2), mkexpr(nd1), mkexpr(nd0)) );
1643    assign( nd4, mkU32(floor) );
1644    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1645    assign( nd6, IRExpr_ITE(mkexpr(nd5), mkexpr(nd4), mkexpr(nd3)) );
1646    assign( *res, mkexpr(nd6) );
1647
1648    /* if saturation occurred, then resQ is set to some nonzero value
1649       if sat did not occur, resQ is guaranteed to be zero. */
1650    if (resQ) {
1651       assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1652    }
1653 }
1654
1655
1656 /* SignedSatQ(): 'clamp' each value so it lies between  -2^N <= x <= (2^N) - 1
1657    Optionally return flag resQ saying whether saturation occurred.
1658    - see definition in manual, section A2.2.1, page 41
1659    (bits(N), boolean ) SignedSatQ( integer i, integer N )
1660    {
1661      if ( i > 2^(N-1) - 1 )    { result = 2^(N-1) - 1; saturated = TRUE; }
1662      elsif ( i < -(2^(N-1)) )  { result = -(2^(N-1));  saturated = FALSE; }
1663      else                      { result = i;           saturated = FALSE; }
1664      return ( result[N-1:0], saturated );
1665    }
1666 */
1667 static void armSignedSatQ( IRTemp regT,    /* value to clamp - Ity_I32 */
1668                            UInt imm5,      /* saturation ceiling */
1669                            IRTemp* res,    /* OUT - Ity_I32 */
1670                            IRTemp* resQ )  /* OUT - Ity_I32  */
1671 {
1672    Long ceil64  =  (1LL << (imm5-1)) - 1;  //  (2^(imm5-1))-1
1673    Long floor64 = -(1LL << (imm5-1));      // -(2^(imm5-1))
1674    Int  ceil    = (Int)ceil64;
1675    Int  floor   = (Int)floor64;
1676
1677    IRTemp nd0 = newTemp(Ity_I32);
1678    IRTemp nd1 = newTemp(Ity_I32);
1679    IRTemp nd2 = newTemp(Ity_I1);
1680    IRTemp nd3 = newTemp(Ity_I32);
1681    IRTemp nd4 = newTemp(Ity_I32);
1682    IRTemp nd5 = newTemp(Ity_I1);
1683    IRTemp nd6 = newTemp(Ity_I32);
1684
1685    assign( nd0, mkexpr(regT) );
1686    assign( nd1, mkU32(ceil) );
1687    assign( nd2, binop( Iop_CmpLT32S, mkexpr(nd1), mkexpr(nd0) ) );
1688    assign( nd3, IRExpr_ITE( mkexpr(nd2), mkexpr(nd1), mkexpr(nd0) ) );
1689    assign( nd4, mkU32(floor) );
1690    assign( nd5, binop( Iop_CmpLT32S, mkexpr(nd3), mkexpr(nd4) ) );
1691    assign( nd6, IRExpr_ITE( mkexpr(nd5), mkexpr(nd4), mkexpr(nd3) ) );
1692    assign( *res, mkexpr(nd6) );
1693
1694    /* if saturation occurred, then resQ is set to some nonzero value
1695       if sat did not occur, resQ is guaranteed to be zero. */
1696    if (resQ) {
1697      assign( *resQ, binop(Iop_Xor32, mkexpr(*res), mkexpr(regT)) );
1698    }
1699 }
1700
1701
1702 /* Compute a value 0 :: I32 or 1 :: I32, indicating whether signed
1703    overflow occurred for 32-bit addition.  Needs both args and the
1704    result.  HD p27. */
1705 static
1706 IRExpr* signed_overflow_after_Add32 ( IRExpr* resE,
1707                                       IRTemp argL, IRTemp argR )
1708 {
1709    IRTemp res = newTemp(Ity_I32);
1710    assign(res, resE);
1711    return
1712       binop( Iop_Shr32,
1713              binop( Iop_And32,
1714                     binop( Iop_Xor32, mkexpr(res), mkexpr(argL) ),
1715                     binop( Iop_Xor32, mkexpr(res), mkexpr(argR) )),
1716              mkU8(31) );
1717 }
1718
1719 /* Similarly .. also from HD p27 .. */
1720 static
1721 IRExpr* signed_overflow_after_Sub32 ( IRExpr* resE,
1722                                       IRTemp argL, IRTemp argR )
1723 {
1724    IRTemp res = newTemp(Ity_I32);
1725    assign(res, resE);
1726    return
1727       binop( Iop_Shr32,
1728              binop( Iop_And32,
1729                     binop( Iop_Xor32, mkexpr(argL), mkexpr(argR) ),
1730                     binop( Iop_Xor32, mkexpr(res),  mkexpr(argL) )),
1731              mkU8(31) );
1732 }
1733
1734
1735 /*------------------------------------------------------------*/
1736 /*--- Larger helpers                                       ---*/
1737 /*------------------------------------------------------------*/
1738
1739 /* Compute both the result and new C flag value for a LSL by an imm5
1740    or by a register operand.  May generate reads of the old C value
1741    (hence only safe to use before any writes to guest state happen).
1742    Are factored out so can be used by both ARM and Thumb.
1743
1744    Note that in compute_result_and_C_after_{LSL,LSR,ASR}_by{imm5,reg},
1745    "res" (the result)  is a.k.a. "shop", shifter operand
1746    "newC" (the new C)  is a.k.a. "shco", shifter carry out
1747
1748    The calling convention for res and newC is a bit funny.  They could
1749    be passed by value, but instead are passed by ref.
1750
1751    The C (shco) value computed must be zero in bits 31:1, as the IR
1752    optimisations for flag handling (guest_arm_spechelper) rely on
1753    that, and the slow-path handlers (armg_calculate_flags_nzcv) assert
1754    for it.  Same applies to all these functions that compute shco
1755    after a shift or rotate, not just this one.
1756 */
1757
1758 static void compute_result_and_C_after_LSL_by_imm5 (
1759                /*OUT*/HChar* buf,
1760                IRTemp* res,
1761                IRTemp* newC,
1762                IRTemp rMt, UInt shift_amt, /* operands */
1763                UInt rM      /* only for debug printing */
1764             )
1765 {
1766    if (shift_amt == 0) {
1767       if (newC) {
1768          assign( *newC, mk_armg_calculate_flag_c() );
1769       }
1770       assign( *res, mkexpr(rMt) );
1771       DIS(buf, "r%u", rM);
1772    } else {
1773       vassert(shift_amt >= 1 && shift_amt <= 31);
1774       if (newC) {
1775          assign( *newC,
1776                  binop(Iop_And32,
1777                        binop(Iop_Shr32, mkexpr(rMt),
1778                                         mkU8(32 - shift_amt)),
1779                        mkU32(1)));
1780       }
1781       assign( *res,
1782               binop(Iop_Shl32, mkexpr(rMt), mkU8(shift_amt)) );
1783       DIS(buf, "r%u, LSL #%u", rM, shift_amt);
1784    }
1785 }
1786
1787
1788 static void compute_result_and_C_after_LSL_by_reg (
1789                /*OUT*/HChar* buf,
1790                IRTemp* res,
1791                IRTemp* newC,
1792                IRTemp rMt, IRTemp rSt,  /* operands */
1793                UInt rM,    UInt rS      /* only for debug printing */
1794             )
1795 {
1796    // shift left in range 0 .. 255
1797    // amt  = rS & 255
1798    // res  = amt < 32 ?  Rm << amt  : 0
1799    // newC = amt == 0     ? oldC  :
1800    //        amt in 1..32 ?  Rm[32-amt]  : 0
1801    IRTemp amtT = newTemp(Ity_I32);
1802    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1803    if (newC) {
1804       /* mux0X(amt == 0,
1805                mux0X(amt < 32,
1806                      0,
1807                      Rm[(32-amt) & 31]),
1808                oldC)
1809       */
1810       /* About the best you can do is pray that iropt is able
1811          to nuke most or all of the following junk. */
1812       IRTemp oldC = newTemp(Ity_I32);
1813       assign(oldC, mk_armg_calculate_flag_c() );
1814       assign(
1815          *newC,
1816          IRExpr_ITE(
1817             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1818             mkexpr(oldC),
1819             IRExpr_ITE(
1820                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1821                binop(Iop_And32,
1822                      binop(Iop_Shr32,
1823                            mkexpr(rMt),
1824                            unop(Iop_32to8,
1825                                 binop(Iop_And32,
1826                                       binop(Iop_Sub32,
1827                                             mkU32(32),
1828                                             mkexpr(amtT)),
1829                                       mkU32(31)
1830                                 )
1831                            )
1832                      ),
1833                      mkU32(1)
1834                      ),
1835                mkU32(0)
1836             )
1837          )
1838       );
1839    }
1840    // (Rm << (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1841    // Lhs of the & limits the shift to 31 bits, so as to
1842    // give known IR semantics.  Rhs of the & is all 1s for
1843    // Rs <= 31 and all 0s for Rs >= 32.
1844    assign(
1845       *res,
1846       binop(
1847          Iop_And32,
1848          binop(Iop_Shl32,
1849                mkexpr(rMt),
1850                unop(Iop_32to8,
1851                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1852          binop(Iop_Sar32,
1853                binop(Iop_Sub32,
1854                      mkexpr(amtT),
1855                      mkU32(32)),
1856                mkU8(31))));
1857     DIS(buf, "r%u, LSL r%u", rM, rS);
1858 }
1859
1860
1861 static void compute_result_and_C_after_LSR_by_imm5 (
1862                /*OUT*/HChar* buf,
1863                IRTemp* res,
1864                IRTemp* newC,
1865                IRTemp rMt, UInt shift_amt, /* operands */
1866                UInt rM      /* only for debug printing */
1867             )
1868 {
1869    if (shift_amt == 0) {
1870       // conceptually a 32-bit shift, however:
1871       // res  = 0
1872       // newC = Rm[31]
1873       if (newC) {
1874          assign( *newC,
1875                  binop(Iop_And32,
1876                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1877                        mkU32(1)));
1878       }
1879       assign( *res, mkU32(0) );
1880       DIS(buf, "r%u, LSR #0(a.k.a. 32)", rM);
1881    } else {
1882       // shift in range 1..31
1883       // res  = Rm >>u shift_amt
1884       // newC = Rm[shift_amt - 1]
1885       vassert(shift_amt >= 1 && shift_amt <= 31);
1886       if (newC) {
1887          assign( *newC,
1888                  binop(Iop_And32,
1889                        binop(Iop_Shr32, mkexpr(rMt),
1890                                         mkU8(shift_amt - 1)),
1891                        mkU32(1)));
1892       }
1893       assign( *res,
1894               binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)) );
1895       DIS(buf, "r%u, LSR #%u", rM, shift_amt);
1896    }
1897 }
1898
1899
1900 static void compute_result_and_C_after_LSR_by_reg (
1901                /*OUT*/HChar* buf,
1902                IRTemp* res,
1903                IRTemp* newC,
1904                IRTemp rMt, IRTemp rSt,  /* operands */
1905                UInt rM,    UInt rS      /* only for debug printing */
1906             )
1907 {
1908    // shift right in range 0 .. 255
1909    // amt = rS & 255
1910    // res  = amt < 32 ?  Rm >>u amt  : 0
1911    // newC = amt == 0     ? oldC  :
1912    //        amt in 1..32 ?  Rm[amt-1]  : 0
1913    IRTemp amtT = newTemp(Ity_I32);
1914    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
1915    if (newC) {
1916       /* mux0X(amt == 0,
1917                mux0X(amt < 32,
1918                      0,
1919                      Rm[(amt-1) & 31]),
1920                oldC)
1921       */
1922       IRTemp oldC = newTemp(Ity_I32);
1923       assign(oldC, mk_armg_calculate_flag_c() );
1924       assign(
1925          *newC,
1926          IRExpr_ITE(
1927             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
1928             mkexpr(oldC),
1929             IRExpr_ITE(
1930                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
1931                binop(Iop_And32,
1932                      binop(Iop_Shr32,
1933                            mkexpr(rMt),
1934                            unop(Iop_32to8,
1935                                 binop(Iop_And32,
1936                                       binop(Iop_Sub32,
1937                                             mkexpr(amtT),
1938                                             mkU32(1)),
1939                                       mkU32(31)
1940                                 )
1941                            )
1942                      ),
1943                      mkU32(1)
1944                      ),
1945                mkU32(0)
1946             )
1947          )
1948       );
1949    }
1950    // (Rm >>u (Rs & 31))  &  (((Rs & 255) - 32) >>s 31)
1951    // Lhs of the & limits the shift to 31 bits, so as to
1952    // give known IR semantics.  Rhs of the & is all 1s for
1953    // Rs <= 31 and all 0s for Rs >= 32.
1954    assign(
1955       *res,
1956       binop(
1957          Iop_And32,
1958          binop(Iop_Shr32,
1959                mkexpr(rMt),
1960                unop(Iop_32to8,
1961                     binop(Iop_And32, mkexpr(rSt), mkU32(31)))),
1962          binop(Iop_Sar32,
1963                binop(Iop_Sub32,
1964                      mkexpr(amtT),
1965                      mkU32(32)),
1966                mkU8(31))));
1967     DIS(buf, "r%u, LSR r%u", rM, rS);
1968 }
1969
1970
1971 static void compute_result_and_C_after_ASR_by_imm5 (
1972                /*OUT*/HChar* buf,
1973                IRTemp* res,
1974                IRTemp* newC,
1975                IRTemp rMt, UInt shift_amt, /* operands */
1976                UInt rM      /* only for debug printing */
1977             )
1978 {
1979    if (shift_amt == 0) {
1980       // conceptually a 32-bit shift, however:
1981       // res  = Rm >>s 31
1982       // newC = Rm[31]
1983       if (newC) {
1984          assign( *newC,
1985                  binop(Iop_And32,
1986                        binop(Iop_Shr32, mkexpr(rMt), mkU8(31)),
1987                        mkU32(1)));
1988       }
1989       assign( *res, binop(Iop_Sar32, mkexpr(rMt), mkU8(31)) );
1990       DIS(buf, "r%u, ASR #0(a.k.a. 32)", rM);
1991    } else {
1992       // shift in range 1..31
1993       // res = Rm >>s shift_amt
1994       // newC = Rm[shift_amt - 1]
1995       vassert(shift_amt >= 1 && shift_amt <= 31);
1996       if (newC) {
1997          assign( *newC,
1998                  binop(Iop_And32,
1999                        binop(Iop_Shr32, mkexpr(rMt),
2000                                         mkU8(shift_amt - 1)),
2001                        mkU32(1)));
2002       }
2003       assign( *res,
2004               binop(Iop_Sar32, mkexpr(rMt), mkU8(shift_amt)) );
2005       DIS(buf, "r%u, ASR #%u", rM, shift_amt);
2006    }
2007 }
2008
2009
2010 static void compute_result_and_C_after_ASR_by_reg (
2011                /*OUT*/HChar* buf,
2012                IRTemp* res,
2013                IRTemp* newC,
2014                IRTemp rMt, IRTemp rSt,  /* operands */
2015                UInt rM,    UInt rS      /* only for debug printing */
2016             )
2017 {
2018    // arithmetic shift right in range 0 .. 255
2019    // amt = rS & 255
2020    // res  = amt < 32 ?  Rm >>s amt  : Rm >>s 31
2021    // newC = amt == 0     ? oldC  :
2022    //        amt in 1..32 ?  Rm[amt-1]  : Rm[31]
2023    IRTemp amtT = newTemp(Ity_I32);
2024    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
2025    if (newC) {
2026       /* mux0X(amt == 0,
2027                mux0X(amt < 32,
2028                      Rm[31],
2029                      Rm[(amt-1) & 31])
2030                oldC)
2031       */
2032       IRTemp oldC = newTemp(Ity_I32);
2033       assign(oldC, mk_armg_calculate_flag_c() );
2034       assign(
2035          *newC,
2036          IRExpr_ITE(
2037             binop(Iop_CmpEQ32, mkexpr(amtT), mkU32(0)),
2038             mkexpr(oldC),
2039             IRExpr_ITE(
2040                binop(Iop_CmpLE32U, mkexpr(amtT), mkU32(32)),
2041                binop(Iop_And32,
2042                      binop(Iop_Shr32,
2043                            mkexpr(rMt),
2044                            unop(Iop_32to8,
2045                                 binop(Iop_And32,
2046                                       binop(Iop_Sub32,
2047                                             mkexpr(amtT),
2048                                             mkU32(1)),
2049                                       mkU32(31)
2050                                 )
2051                            )
2052                      ),
2053                      mkU32(1)
2054                      ),
2055                binop(Iop_And32,
2056                      binop(Iop_Shr32,
2057                            mkexpr(rMt),
2058                            mkU8(31)
2059                      ),
2060                      mkU32(1)
2061                )
2062             )
2063          )
2064       );
2065    }
2066    // (Rm >>s (amt <u 32 ? amt : 31))
2067    assign(
2068       *res,
2069       binop(
2070          Iop_Sar32,
2071          mkexpr(rMt),
2072          unop(
2073             Iop_32to8,
2074             IRExpr_ITE(
2075                binop(Iop_CmpLT32U, mkexpr(amtT), mkU32(32)),
2076                mkexpr(amtT),
2077                mkU32(31)))));
2078     DIS(buf, "r%u, ASR r%u", rM, rS);
2079 }
2080
2081
2082 static void compute_result_and_C_after_ROR_by_reg (
2083                /*OUT*/HChar* buf,
2084                IRTemp* res,
2085                IRTemp* newC,
2086                IRTemp rMt, IRTemp rSt,  /* operands */
2087                UInt rM,    UInt rS      /* only for debug printing */
2088             )
2089 {
2090    // rotate right in range 0 .. 255
2091    // amt = rS & 255
2092    // shop =  Rm `ror` (amt & 31)
2093    // shco =  amt == 0 ? oldC : Rm[(amt-1) & 31]
2094    IRTemp amtT = newTemp(Ity_I32);
2095    assign( amtT, binop(Iop_And32, mkexpr(rSt), mkU32(255)) );
2096    IRTemp amt5T = newTemp(Ity_I32);
2097    assign( amt5T, binop(Iop_And32, mkexpr(rSt), mkU32(31)) );
2098    IRTemp oldC = newTemp(Ity_I32);
2099    assign(oldC, mk_armg_calculate_flag_c() );
2100    if (newC) {
2101       assign(
2102          *newC,
2103          IRExpr_ITE(
2104             binop(Iop_CmpNE32, mkexpr(amtT), mkU32(0)),
2105             binop(Iop_And32,
2106                   binop(Iop_Shr32,
2107                         mkexpr(rMt),
2108                         unop(Iop_32to8,
2109                              binop(Iop_And32,
2110                                    binop(Iop_Sub32,
2111                                          mkexpr(amtT),
2112                                          mkU32(1)
2113                                    ),
2114                                    mkU32(31)
2115                              )
2116                         )
2117                   ),
2118                   mkU32(1)
2119             ),
2120             mkexpr(oldC)
2121          )
2122       );
2123    }
2124    assign(
2125       *res,
2126       IRExpr_ITE(
2127          binop(Iop_CmpNE32, mkexpr(amt5T), mkU32(0)),
2128          binop(Iop_Or32,
2129                binop(Iop_Shr32,
2130                      mkexpr(rMt),
2131                      unop(Iop_32to8, mkexpr(amt5T))
2132                ),
2133                binop(Iop_Shl32,
2134                      mkexpr(rMt),
2135                      unop(Iop_32to8,
2136                           binop(Iop_Sub32, mkU32(32), mkexpr(amt5T))
2137                      )
2138                )
2139                ),
2140          mkexpr(rMt)
2141       )
2142    );
2143    DIS(buf, "r%u, ROR r#%u", rM, rS);
2144 }
2145
2146
2147 /* Generate an expression corresponding to the immediate-shift case of
2148    a shifter operand.  This is used both for ARM and Thumb2.
2149
2150    Bind it to a temporary, and return that via *res.  If newC is
2151    non-NULL, also compute a value for the shifter's carry out (in the
2152    LSB of a word), bind it to a temporary, and return that via *shco.
2153
2154    Generates GETs from the guest state and is therefore not safe to
2155    use once we start doing PUTs to it, for any given instruction.
2156
2157    'how' is encoded thusly:
2158       00b LSL,  01b LSR,  10b ASR,  11b ROR
2159    Most but not all ARM and Thumb integer insns use this encoding.
2160    Be careful to ensure the right value is passed here.
2161 */
2162 static void compute_result_and_C_after_shift_by_imm5 (
2163                /*OUT*/HChar* buf,
2164                /*OUT*/IRTemp* res,
2165                /*OUT*/IRTemp* newC,
2166                IRTemp  rMt,       /* reg to shift */
2167                UInt    how,       /* what kind of shift */
2168                UInt    shift_amt, /* shift amount (0..31) */
2169                UInt    rM         /* only for debug printing */
2170             )
2171 {
2172    vassert(shift_amt < 32);
2173    vassert(how < 4);
2174
2175    switch (how) {
2176
2177       case 0:
2178          compute_result_and_C_after_LSL_by_imm5(
2179             buf, res, newC, rMt, shift_amt, rM
2180          );
2181          break;
2182
2183       case 1:
2184          compute_result_and_C_after_LSR_by_imm5(
2185             buf, res, newC, rMt, shift_amt, rM
2186          );
2187          break;
2188
2189       case 2:
2190          compute_result_and_C_after_ASR_by_imm5(
2191             buf, res, newC, rMt, shift_amt, rM
2192          );
2193          break;
2194
2195       case 3:
2196          if (shift_amt == 0) {
2197             IRTemp oldcT = newTemp(Ity_I32);
2198             // rotate right 1 bit through carry (?)
2199             // RRX -- described at ARM ARM A5-17
2200             // res  = (oldC << 31) | (Rm >>u 1)
2201             // newC = Rm[0]
2202             if (newC) {
2203                assign( *newC,
2204                        binop(Iop_And32, mkexpr(rMt), mkU32(1)));
2205             }
2206             assign( oldcT, mk_armg_calculate_flag_c() );
2207             assign( *res,
2208                     binop(Iop_Or32,
2209                           binop(Iop_Shl32, mkexpr(oldcT), mkU8(31)),
2210                           binop(Iop_Shr32, mkexpr(rMt), mkU8(1))) );
2211             DIS(buf, "r%u, RRX", rM);
2212          } else {
2213             // rotate right in range 1..31
2214             // res  = Rm `ror` shift_amt
2215             // newC = Rm[shift_amt - 1]
2216             vassert(shift_amt >= 1 && shift_amt <= 31);
2217             if (newC) {
2218                assign( *newC,
2219                        binop(Iop_And32,
2220                              binop(Iop_Shr32, mkexpr(rMt),
2221                                               mkU8(shift_amt - 1)),
2222                              mkU32(1)));
2223             }
2224             assign( *res,
2225                     binop(Iop_Or32,
2226                           binop(Iop_Shr32, mkexpr(rMt), mkU8(shift_amt)),
2227                           binop(Iop_Shl32, mkexpr(rMt),
2228                                            mkU8(32-shift_amt))));
2229             DIS(buf, "r%u, ROR #%u", rM, shift_amt);
2230          }
2231          break;
2232
2233       default:
2234          /*NOTREACHED*/
2235          vassert(0);
2236    }
2237 }
2238
2239
2240 /* Generate an expression corresponding to the register-shift case of
2241    a shifter operand.  This is used both for ARM and Thumb2.
2242
2243    Bind it to a temporary, and return that via *res.  If newC is
2244    non-NULL, also compute a value for the shifter's carry out (in the
2245    LSB of a word), bind it to a temporary, and return that via *shco.
2246
2247    Generates GETs from the guest state and is therefore not safe to
2248    use once we start doing PUTs to it, for any given instruction.
2249
2250    'how' is encoded thusly:
2251       00b LSL,  01b LSR,  10b ASR,  11b ROR
2252    Most but not all ARM and Thumb integer insns use this encoding.
2253    Be careful to ensure the right value is passed here.
2254 */
2255 static void compute_result_and_C_after_shift_by_reg (
2256                /*OUT*/HChar*  buf,
2257                /*OUT*/IRTemp* res,
2258                /*OUT*/IRTemp* newC,
2259                IRTemp  rMt,       /* reg to shift */
2260                UInt    how,       /* what kind of shift */
2261                IRTemp  rSt,       /* shift amount */
2262                UInt    rM,        /* only for debug printing */
2263                UInt    rS         /* only for debug printing */
2264             )
2265 {
2266    vassert(how < 4);
2267    switch (how) {
2268       case 0: { /* LSL */
2269          compute_result_and_C_after_LSL_by_reg(
2270             buf, res, newC, rMt, rSt, rM, rS
2271          );
2272          break;
2273       }
2274       case 1: { /* LSR */
2275          compute_result_and_C_after_LSR_by_reg(
2276             buf, res, newC, rMt, rSt, rM, rS
2277          );
2278          break;
2279       }
2280       case 2: { /* ASR */
2281          compute_result_and_C_after_ASR_by_reg(
2282             buf, res, newC, rMt, rSt, rM, rS
2283          );
2284          break;
2285       }
2286       case 3: { /* ROR */
2287          compute_result_and_C_after_ROR_by_reg(
2288              buf, res, newC, rMt, rSt, rM, rS
2289          );
2290          break;
2291       }
2292       default:
2293          /*NOTREACHED*/
2294          vassert(0);
2295    }
2296 }
2297
2298
2299 /* Generate an expression corresponding to a shifter_operand, bind it
2300    to a temporary, and return that via *shop.  If shco is non-NULL,
2301    also compute a value for the shifter's carry out (in the LSB of a
2302    word), bind it to a temporary, and return that via *shco.
2303
2304    If for some reason we can't come up with a shifter operand (missing
2305    case?  not really a shifter operand?) return False.
2306
2307    Generates GETs from the guest state and is therefore not safe to
2308    use once we start doing PUTs to it, for any given instruction.
2309
2310    For ARM insns only; not for Thumb.
2311 */
2312 static Bool mk_shifter_operand ( UInt insn_25, UInt insn_11_0,
2313                                  /*OUT*/IRTemp* shop,
2314                                  /*OUT*/IRTemp* shco,
2315                                  /*OUT*/HChar* buf )
2316 {
2317    UInt insn_4 = (insn_11_0 >> 4) & 1;
2318    UInt insn_7 = (insn_11_0 >> 7) & 1;
2319    vassert(insn_25 <= 0x1);
2320    vassert(insn_11_0 <= 0xFFF);
2321
2322    vassert(shop && *shop == IRTemp_INVALID);
2323    *shop = newTemp(Ity_I32);
2324
2325    if (shco) {
2326       vassert(*shco == IRTemp_INVALID);
2327       *shco = newTemp(Ity_I32);
2328    }
2329
2330    /* 32-bit immediate */
2331
2332    if (insn_25 == 1) {
2333       /* immediate: (7:0) rotated right by 2 * (11:8) */
2334       UInt imm = (insn_11_0 >> 0) & 0xFF;
2335       UInt rot = 2 * ((insn_11_0 >> 8) & 0xF);
2336       vassert(rot <= 30);
2337       imm = ROR32(imm, rot);
2338       if (shco) {
2339          if (rot == 0) {
2340             assign( *shco, mk_armg_calculate_flag_c() );
2341          } else {
2342             assign( *shco, mkU32( (imm >> 31) & 1 ) );
2343          }
2344       }
2345       DIS(buf, "#0x%x", imm);
2346       assign( *shop, mkU32(imm) );
2347       return True;
2348    }
2349
2350    /* Shift/rotate by immediate */
2351
2352    if (insn_25 == 0 && insn_4 == 0) {
2353       /* Rm (3:0) shifted (6:5) by immediate (11:7) */
2354       UInt shift_amt = (insn_11_0 >> 7) & 0x1F;
2355       UInt rM        = (insn_11_0 >> 0) & 0xF;
2356       UInt how       = (insn_11_0 >> 5) & 3;
2357       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2358       IRTemp rMt = newTemp(Ity_I32);
2359       assign(rMt, getIRegA(rM));
2360
2361       vassert(shift_amt <= 31);
2362
2363       compute_result_and_C_after_shift_by_imm5(
2364          buf, shop, shco, rMt, how, shift_amt, rM
2365       );
2366       return True;
2367    }
2368
2369    /* Shift/rotate by register */
2370    if (insn_25 == 0 && insn_4 == 1) {
2371       /* Rm (3:0) shifted (6:5) by Rs (11:8) */
2372       UInt rM  = (insn_11_0 >> 0) & 0xF;
2373       UInt rS  = (insn_11_0 >> 8) & 0xF;
2374       UInt how = (insn_11_0 >> 5) & 3;
2375       /* how: 00 = Shl, 01 = Shr, 10 = Sar, 11 = Ror */
2376       IRTemp rMt = newTemp(Ity_I32);
2377       IRTemp rSt = newTemp(Ity_I32);
2378
2379       if (insn_7 == 1)
2380          return False; /* not really a shifter operand */
2381
2382       assign(rMt, getIRegA(rM));
2383       assign(rSt, getIRegA(rS));
2384
2385       compute_result_and_C_after_shift_by_reg(
2386          buf, shop, shco, rMt, how, rSt, rM, rS
2387       );
2388       return True;
2389    }
2390
2391    vex_printf("mk_shifter_operand(0x%x,0x%x)\n", insn_25, insn_11_0 );
2392    return False;
2393 }
2394
2395
2396 /* ARM only */
2397 static
2398 IRExpr* mk_EA_reg_plusminus_imm12 ( UInt rN, UInt bU, UInt imm12,
2399                                     /*OUT*/HChar* buf )
2400 {
2401    vassert(rN < 16);
2402    vassert(bU < 2);
2403    vassert(imm12 < 0x1000);
2404    HChar opChar = bU == 1 ? '+' : '-';
2405    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm12);
2406    return
2407       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2408              getIRegA(rN),
2409              mkU32(imm12) );
2410 }
2411
2412
2413 /* ARM only.
2414    NB: This is "DecodeImmShift" in newer versions of the the ARM ARM.
2415 */
2416 static
2417 IRExpr* mk_EA_reg_plusminus_shifted_reg ( UInt rN, UInt bU, UInt rM,
2418                                           UInt sh2, UInt imm5,
2419                                           /*OUT*/HChar* buf )
2420 {
2421    vassert(rN < 16);
2422    vassert(bU < 2);
2423    vassert(rM < 16);
2424    vassert(sh2 < 4);
2425    vassert(imm5 < 32);
2426    HChar   opChar = bU == 1 ? '+' : '-';
2427    IRExpr* index  = NULL;
2428    switch (sh2) {
2429       case 0: /* LSL */
2430          /* imm5 can be in the range 0 .. 31 inclusive. */
2431          index = binop(Iop_Shl32, getIRegA(rM), mkU8(imm5));
2432          DIS(buf, "[r%u, %c r%u LSL #%u]", rN, opChar, rM, imm5);
2433          break;
2434       case 1: /* LSR */
2435          if (imm5 == 0) {
2436             index = mkU32(0);
2437             vassert(0); // ATC
2438          } else {
2439             index = binop(Iop_Shr32, getIRegA(rM), mkU8(imm5));
2440          }
2441          DIS(buf, "[r%u, %cr%u, LSR #%u]",
2442                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2443          break;
2444       case 2: /* ASR */
2445          /* Doesn't this just mean that the behaviour with imm5 == 0
2446             is the same as if it had been 31 ? */
2447          if (imm5 == 0) {
2448             index = binop(Iop_Sar32, getIRegA(rM), mkU8(31));
2449             vassert(0); // ATC
2450          } else {
2451             index = binop(Iop_Sar32, getIRegA(rM), mkU8(imm5));
2452          }
2453          DIS(buf, "[r%u, %cr%u, ASR #%u]",
2454                   rN, opChar, rM, imm5 == 0 ? 32 : imm5);
2455          break;
2456       case 3: /* ROR or RRX */
2457          if (imm5 == 0) {
2458             IRTemp rmT    = newTemp(Ity_I32);
2459             IRTemp cflagT = newTemp(Ity_I32);
2460             assign(rmT, getIRegA(rM));
2461             assign(cflagT, mk_armg_calculate_flag_c());
2462             index = binop(Iop_Or32,
2463                           binop(Iop_Shl32, mkexpr(cflagT), mkU8(31)),
2464                           binop(Iop_Shr32, mkexpr(rmT), mkU8(1)));
2465             DIS(buf, "[r%u, %cr%u, RRX]", rN, opChar, rM);
2466          } else {
2467             IRTemp rmT = newTemp(Ity_I32);
2468             assign(rmT, getIRegA(rM));
2469             vassert(imm5 >= 1 && imm5 <= 31);
2470             index = binop(Iop_Or32,
2471                           binop(Iop_Shl32, mkexpr(rmT), mkU8(32-imm5)),
2472                           binop(Iop_Shr32, mkexpr(rmT), mkU8(imm5)));
2473             DIS(buf, "[r%u, %cr%u, ROR #%u]", rN, opChar, rM, imm5);
2474          }
2475          break;
2476       default:
2477          vassert(0);
2478    }
2479    vassert(index);
2480    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2481                 getIRegA(rN), index);
2482 }
2483
2484
2485 /* ARM only */
2486 static
2487 IRExpr* mk_EA_reg_plusminus_imm8 ( UInt rN, UInt bU, UInt imm8,
2488                                    /*OUT*/HChar* buf )
2489 {
2490    vassert(rN < 16);
2491    vassert(bU < 2);
2492    vassert(imm8 < 0x100);
2493    HChar opChar = bU == 1 ? '+' : '-';
2494    DIS(buf, "[r%u, #%c%u]", rN, opChar, imm8);
2495    return
2496       binop( (bU == 1 ? Iop_Add32 : Iop_Sub32),
2497              getIRegA(rN),
2498              mkU32(imm8) );
2499 }
2500
2501
2502 /* ARM only */
2503 static
2504 IRExpr* mk_EA_reg_plusminus_reg ( UInt rN, UInt bU, UInt rM,
2505                                   /*OUT*/HChar* buf )
2506 {
2507    vassert(rN < 16);
2508    vassert(bU < 2);
2509    vassert(rM < 16);
2510    HChar   opChar = bU == 1 ? '+' : '-';
2511    IRExpr* index  = getIRegA(rM);
2512    DIS(buf, "[r%u, %c r%u]", rN, opChar, rM);
2513    return binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
2514                 getIRegA(rN), index);
2515 }
2516
2517
2518 /* irRes :: Ity_I32 holds a floating point comparison result encoded
2519    as an IRCmpF64Result.  Generate code to convert it to an
2520    ARM-encoded (N,Z,C,V) group in the lowest 4 bits of an I32 value.
2521    Assign a new temp to hold that value, and return the temp. */
2522 static
2523 IRTemp mk_convert_IRCmpF64Result_to_NZCV ( IRTemp irRes )
2524 {
2525    IRTemp ix       = newTemp(Ity_I32);
2526    IRTemp termL    = newTemp(Ity_I32);
2527    IRTemp termR    = newTemp(Ity_I32);
2528    IRTemp nzcv     = newTemp(Ity_I32);
2529
2530    /* This is where the fun starts.  We have to convert 'irRes' from
2531       an IR-convention return result (IRCmpF64Result) to an
2532       ARM-encoded (N,Z,C,V) group.  The final result is in the bottom
2533       4 bits of 'nzcv'. */
2534    /* Map compare result from IR to ARM(nzcv) */
2535    /*
2536       FP cmp result | IR   | ARM(nzcv)
2537       --------------------------------
2538       UN              0x45   0011
2539       LT              0x01   1000
2540       GT              0x00   0010
2541       EQ              0x40   0110
2542    */
2543    /* Now since you're probably wondering WTF ..
2544
2545       ix fishes the useful bits out of the IR value, bits 6 and 0, and
2546       places them side by side, giving a number which is 0, 1, 2 or 3.
2547
2548       termL is a sequence cooked up by GNU superopt.  It converts ix
2549          into an almost correct value NZCV value (incredibly), except
2550          for the case of UN, where it produces 0100 instead of the
2551          required 0011.
2552
2553       termR is therefore a correction term, also computed from ix.  It
2554          is 1 in the UN case and 0 for LT, GT and UN.  Hence, to get
2555          the final correct value, we subtract termR from termL.
2556
2557       Don't take my word for it.  There's a test program at the bottom
2558       of this file, to try this out with.
2559    */
2560    assign(
2561       ix,
2562       binop(Iop_Or32,
2563             binop(Iop_And32,
2564                   binop(Iop_Shr32, mkexpr(irRes), mkU8(5)),
2565                   mkU32(3)),
2566             binop(Iop_And32, mkexpr(irRes), mkU32(1))));
2567
2568    assign(
2569       termL,
2570       binop(Iop_Add32,
2571             binop(Iop_Shr32,
2572                   binop(Iop_Sub32,
2573                         binop(Iop_Shl32,
2574                               binop(Iop_Xor32, mkexpr(ix), mkU32(1)),
2575                               mkU8(30)),
2576                         mkU32(1)),
2577                   mkU8(29)),
2578             mkU32(1)));
2579
2580    assign(
2581       termR,
2582       binop(Iop_And32,
2583             binop(Iop_And32,
2584                   mkexpr(ix),
2585                   binop(Iop_Shr32, mkexpr(ix), mkU8(1))),
2586             mkU32(1)));
2587
2588    assign(nzcv, binop(Iop_Sub32, mkexpr(termL), mkexpr(termR)));
2589    return nzcv;
2590 }
2591
2592
2593 /* Thumb32 only.  This is "ThumbExpandImm" in the ARM ARM.  If
2594    updatesC is non-NULL, a boolean is written to it indicating whether
2595    or not the C flag is updated, as per ARM ARM "ThumbExpandImm_C".
2596 */
2597 static UInt thumbExpandImm ( Bool* updatesC,
2598                              UInt imm1, UInt imm3, UInt imm8 )
2599 {
2600    vassert(imm1 < (1<<1));
2601    vassert(imm3 < (1<<3));
2602    vassert(imm8 < (1<<8));
2603    UInt i_imm3_a = (imm1 << 4) | (imm3 << 1) | ((imm8 >> 7) & 1);
2604    UInt abcdefgh = imm8;
2605    UInt lbcdefgh = imm8 | 0x80;
2606    if (updatesC) {
2607       *updatesC = i_imm3_a >= 8;
2608    }
2609    switch (i_imm3_a) {
2610       case 0: case 1:
2611          return abcdefgh;
2612       case 2: case 3:
2613          return (abcdefgh << 16) | abcdefgh;
2614       case 4: case 5:
2615          return (abcdefgh << 24) | (abcdefgh << 8);
2616       case 6: case 7:
2617          return (abcdefgh << 24) | (abcdefgh << 16)
2618                 | (abcdefgh << 8) | abcdefgh;
2619       case 8 ... 31:
2620          return lbcdefgh << (32 - i_imm3_a);
2621       default:
2622          break;
2623    }
2624    /*NOTREACHED*/vassert(0);
2625 }
2626
2627
2628 /* Version of thumbExpandImm where we simply feed it the
2629    instruction halfwords (the lowest addressed one is I0). */
2630 static UInt thumbExpandImm_from_I0_I1 ( Bool* updatesC,
2631                                         UShort i0s, UShort i1s )
2632 {
2633    UInt i0    = (UInt)i0s;
2634    UInt i1    = (UInt)i1s;
2635    UInt imm1  = SLICE_UInt(i0,10,10);
2636    UInt imm3  = SLICE_UInt(i1,14,12);
2637    UInt imm8  = SLICE_UInt(i1,7,0);
2638    return thumbExpandImm(updatesC, imm1, imm3, imm8);
2639 }
2640
2641
2642 /* Thumb16 only.  Given the firstcond and mask fields from an IT
2643    instruction, compute the 32-bit ITSTATE value implied, as described
2644    in libvex_guest_arm.h.  This is not the ARM ARM representation.
2645    Also produce the t/e chars for the 2nd, 3rd, 4th insns, for
2646    disassembly printing.  Returns False if firstcond or mask
2647    denote something invalid.
2648
2649    The number and conditions for the instructions to be
2650    conditionalised depend on firstcond and mask:
2651
2652    mask      cond 1    cond 2      cond 3      cond 4
2653
2654    1000      fc[3:0]
2655    x100      fc[3:0]   fc[3:1]:x
2656    xy10      fc[3:0]   fc[3:1]:x   fc[3:1]:y
2657    xyz1      fc[3:0]   fc[3:1]:x   fc[3:1]:y   fc[3:1]:z
2658
2659    The condition fields are assembled in *itstate backwards (cond 4 at
2660    the top, cond 1 at the bottom).  Conditions are << 4'd and then
2661    ^0xE'd, and those fields that correspond to instructions in the IT
2662    block are tagged with a 1 bit.
2663 */
2664 static Bool compute_ITSTATE ( /*OUT*/UInt*  itstate,
2665                               /*OUT*/HChar* ch1,
2666                               /*OUT*/HChar* ch2,
2667                               /*OUT*/HChar* ch3,
2668                               UInt firstcond, UInt mask )
2669 {
2670    vassert(firstcond <= 0xF);
2671    vassert(mask <= 0xF);
2672    *itstate = 0;
2673    *ch1 = *ch2 = *ch3 = '.';
2674    if (mask == 0)
2675       return False; /* the logic below actually ensures this anyway,
2676                        but clearer to make it explicit. */
2677    if (firstcond == 0xF)
2678       return False; /* NV is not allowed */
2679    if (firstcond == 0xE && popcount32(mask) != 1)
2680       return False; /* if firstcond is AL then all the rest must be too */
2681
2682    UInt m3 = (mask >> 3) & 1;
2683    UInt m2 = (mask >> 2) & 1;
2684    UInt m1 = (mask >> 1) & 1;
2685    UInt m0 = (mask >> 0) & 1;
2686
2687    UInt fc = (firstcond << 4) | 1/*in-IT-block*/;
2688    UInt ni = (0xE/*AL*/ << 4) | 0/*not-in-IT-block*/;
2689
2690    if (m3 == 1 && (m2|m1|m0) == 0) {
2691       *itstate = (ni << 24) | (ni << 16) | (ni << 8) | fc;
2692       *itstate ^= 0xE0E0E0E0;
2693       return True;
2694    }
2695
2696    if (m2 == 1 && (m1|m0) == 0) {
2697       *itstate = (ni << 24) | (ni << 16) | (setbit32(fc, 4, m3) << 8) | fc;
2698       *itstate ^= 0xE0E0E0E0;
2699       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2700       return True;
2701    }
2702
2703    if (m1 == 1 && m0 == 0) {
2704       *itstate = (ni << 24)
2705                  | (setbit32(fc, 4, m2) << 16)
2706                  | (setbit32(fc, 4, m3) << 8) | fc;
2707       *itstate ^= 0xE0E0E0E0;
2708       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2709       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2710       return True;
2711    }
2712
2713    if (m0 == 1) {
2714       *itstate = (setbit32(fc, 4, m1) << 24)
2715                  | (setbit32(fc, 4, m2) << 16)
2716                  | (setbit32(fc, 4, m3) << 8) | fc;
2717       *itstate ^= 0xE0E0E0E0;
2718       *ch1 = m3 == (firstcond & 1) ? 't' : 'e';
2719       *ch2 = m2 == (firstcond & 1) ? 't' : 'e';
2720       *ch3 = m1 == (firstcond & 1) ? 't' : 'e';
2721       return True;
2722    }
2723
2724    return False;
2725 }
2726
2727
2728 /* Generate IR to do 32-bit bit reversal, a la Hacker's Delight
2729    Chapter 7 Section 1. */
2730 static IRTemp gen_BITREV ( IRTemp x0 )
2731 {
2732    IRTemp x1 = newTemp(Ity_I32);
2733    IRTemp x2 = newTemp(Ity_I32);
2734    IRTemp x3 = newTemp(Ity_I32);
2735    IRTemp x4 = newTemp(Ity_I32);
2736    IRTemp x5 = newTemp(Ity_I32);
2737    UInt   c1 = 0x55555555;
2738    UInt   c2 = 0x33333333;
2739    UInt   c3 = 0x0F0F0F0F;
2740    UInt   c4 = 0x00FF00FF;
2741    UInt   c5 = 0x0000FFFF;
2742    assign(x1,
2743           binop(Iop_Or32,
2744                 binop(Iop_Shl32,
2745                       binop(Iop_And32, mkexpr(x0), mkU32(c1)),
2746                       mkU8(1)),
2747                 binop(Iop_Shr32,
2748                       binop(Iop_And32, mkexpr(x0), mkU32(~c1)),
2749                       mkU8(1))
2750    ));
2751    assign(x2,
2752           binop(Iop_Or32,
2753                 binop(Iop_Shl32,
2754                       binop(Iop_And32, mkexpr(x1), mkU32(c2)),
2755                       mkU8(2)),
2756                 binop(Iop_Shr32,
2757                       binop(Iop_And32, mkexpr(x1), mkU32(~c2)),
2758                       mkU8(2))
2759    ));
2760    assign(x3,
2761           binop(Iop_Or32,
2762                 binop(Iop_Shl32,
2763                       binop(Iop_And32, mkexpr(x2), mkU32(c3)),
2764                       mkU8(4)),
2765                 binop(Iop_Shr32,
2766                       binop(Iop_And32, mkexpr(x2), mkU32(~c3)),
2767                       mkU8(4))
2768    ));
2769    assign(x4,
2770           binop(Iop_Or32,
2771                 binop(Iop_Shl32,
2772                       binop(Iop_And32, mkexpr(x3), mkU32(c4)),
2773                       mkU8(8)),
2774                 binop(Iop_Shr32,
2775                       binop(Iop_And32, mkexpr(x3), mkU32(~c4)),
2776                       mkU8(8))
2777    ));
2778    assign(x5,
2779           binop(Iop_Or32,
2780                 binop(Iop_Shl32,
2781                       binop(Iop_And32, mkexpr(x4), mkU32(c5)),
2782                       mkU8(16)),
2783                 binop(Iop_Shr32,
2784                       binop(Iop_And32, mkexpr(x4), mkU32(~c5)),
2785                       mkU8(16))
2786    ));
2787    return x5;
2788 }
2789
2790
2791 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2792    0:1:2:3 (aka byte-swap). */
2793 static IRTemp gen_REV ( IRTemp arg )
2794 {
2795    IRTemp res = newTemp(Ity_I32);
2796    assign(res,
2797           binop(Iop_Or32,
2798                 binop(Iop_Shl32, mkexpr(arg), mkU8(24)),
2799           binop(Iop_Or32,
2800                 binop(Iop_And32, binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2801                                  mkU32(0x00FF0000)),
2802           binop(Iop_Or32,
2803                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2804                                        mkU32(0x0000FF00)),
2805                 binop(Iop_And32, binop(Iop_Shr32, mkexpr(arg), mkU8(24)),
2806                                        mkU32(0x000000FF) )
2807    ))));
2808    return res;
2809 }
2810
2811
2812 /* Generate IR to do rearrange bytes 3:2:1:0 in a word in to the order
2813    2:3:0:1 (swap within lo and hi halves). */
2814 static IRTemp gen_REV16 ( IRTemp arg )
2815 {
2816    IRTemp res = newTemp(Ity_I32);
2817    assign(res,
2818           binop(Iop_Or32,
2819                 binop(Iop_And32,
2820                       binop(Iop_Shl32, mkexpr(arg), mkU8(8)),
2821                       mkU32(0xFF00FF00)),
2822                 binop(Iop_And32,
2823                       binop(Iop_Shr32, mkexpr(arg), mkU8(8)),
2824                       mkU32(0x00FF00FF))));
2825    return res;
2826 }
2827
2828
2829 /*------------------------------------------------------------*/
2830 /*--- Advanced SIMD (NEON) instructions                    ---*/
2831 /*------------------------------------------------------------*/
2832
2833 /*------------------------------------------------------------*/
2834 /*--- NEON data processing                                 ---*/
2835 /*------------------------------------------------------------*/
2836
2837 /* For all NEON DP ops, we use the normal scheme to handle conditional
2838    writes to registers -- pass in condT and hand that on to the
2839    put*Reg functions.  In ARM mode condT is always IRTemp_INVALID
2840    since NEON is unconditional for ARM.  In Thumb mode condT is
2841    derived from the ITSTATE shift register in the normal way. */
2842
2843 static
2844 UInt get_neon_d_regno(UInt theInstr)
2845 {
2846    UInt x = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
2847    if (theInstr & 0x40) {
2848       if (x & 1) {
2849          x = x + 0x100;
2850       } else {
2851          x = x >> 1;
2852       }
2853    }
2854    return x;
2855 }
2856
2857 static
2858 UInt get_neon_n_regno(UInt theInstr)
2859 {
2860    UInt x = ((theInstr >> 3) & 0x10) | ((theInstr >> 16) & 0xF);
2861    if (theInstr & 0x40) {
2862       if (x & 1) {
2863          x = x + 0x100;
2864       } else {
2865          x = x >> 1;
2866       }
2867    }
2868    return x;
2869 }
2870
2871 static
2872 UInt get_neon_m_regno(UInt theInstr)
2873 {
2874    UInt x = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
2875    if (theInstr & 0x40) {
2876       if (x & 1) {
2877          x = x + 0x100;
2878       } else {
2879          x = x >> 1;
2880       }
2881    }
2882    return x;
2883 }
2884
2885 static
2886 Bool dis_neon_vext ( UInt theInstr, IRTemp condT )
2887 {
2888    UInt dreg = get_neon_d_regno(theInstr);
2889    UInt mreg = get_neon_m_regno(theInstr);
2890    UInt nreg = get_neon_n_regno(theInstr);
2891    UInt imm4 = (theInstr >> 8) & 0xf;
2892    UInt Q = (theInstr >> 6) & 1;
2893    HChar reg_t = Q ? 'q' : 'd';
2894
2895    if (Q) {
2896       putQReg(dreg, triop(Iop_SliceV128, /*hiV128*/getQReg(mreg),
2897                           /*loV128*/getQReg(nreg), mkU8(imm4)), condT);
2898    } else {
2899       putDRegI64(dreg, triop(Iop_Slice64, /*hiI64*/getDRegI64(mreg),
2900                              /*loI64*/getDRegI64(nreg), mkU8(imm4)), condT);
2901    }
2902    DIP("vext.8 %c%u, %c%u, %c%u, #%u\n", reg_t, dreg, reg_t, nreg,
2903                                          reg_t, mreg, imm4);
2904    return True;
2905 }
2906
2907 /* Generate specific vector FP binary ops, possibly with a fake
2908    rounding mode as required by the primop. */
2909 static
2910 IRExpr* binop_w_fake_RM ( IROp op, IRExpr* argL, IRExpr* argR )
2911 {
2912    switch (op) {
2913       case Iop_Add32Fx4:
2914       case Iop_Sub32Fx4:
2915       case Iop_Mul32Fx4:
2916          return triop(op, get_FAKE_roundingmode(), argL, argR );
2917       case Iop_Add32x4: case Iop_Add16x8:
2918       case Iop_Sub32x4: case Iop_Sub16x8:
2919       case Iop_Mul32x4: case Iop_Mul16x8:
2920       case Iop_Mul32x2: case Iop_Mul16x4:
2921       case Iop_Add32Fx2:
2922       case Iop_Sub32Fx2:
2923       case Iop_Mul32Fx2:
2924       case Iop_PwAdd32Fx2:
2925          return binop(op, argL, argR);
2926       default:
2927         ppIROp(op);
2928         vassert(0);
2929    }
2930 }
2931
2932 /* VTBL, VTBX */
2933 static
2934 Bool dis_neon_vtb ( UInt theInstr, IRTemp condT )
2935 {
2936    UInt op = (theInstr >> 6) & 1;
2937    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
2938    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
2939    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
2940    UInt len = (theInstr >> 8) & 3;
2941    Int i;
2942    IROp cmp;
2943    ULong imm;
2944    IRTemp arg_l;
2945    IRTemp old_mask, new_mask, cur_mask;
2946    IRTemp old_res, new_res;
2947    IRTemp old_arg, new_arg;
2948
2949    if (dreg >= 0x100 || mreg >= 0x100 || nreg >= 0x100)
2950       return False;
2951    if (nreg + len > 31)
2952       return False;
2953
2954    cmp = Iop_CmpGT8Ux8;
2955
2956    old_mask = newTemp(Ity_I64);
2957    old_res = newTemp(Ity_I64);
2958    old_arg = newTemp(Ity_I64);
2959    assign(old_mask, mkU64(0));
2960    assign(old_res, mkU64(0));
2961    assign(old_arg, getDRegI64(mreg));
2962    imm = 8;
2963    imm = (imm <<  8) | imm;
2964    imm = (imm << 16) | imm;
2965    imm = (imm << 32) | imm;
2966
2967    for (i = 0; i <= len; i++) {
2968       arg_l = newTemp(Ity_I64);
2969       new_mask = newTemp(Ity_I64);
2970       cur_mask = newTemp(Ity_I64);
2971       new_res = newTemp(Ity_I64);
2972       new_arg = newTemp(Ity_I64);
2973       assign(arg_l, getDRegI64(nreg+i));
2974       assign(new_arg, binop(Iop_Sub8x8, mkexpr(old_arg), mkU64(imm)));
2975       assign(cur_mask, binop(cmp, mkU64(imm), mkexpr(old_arg)));
2976       assign(new_mask, binop(Iop_Or64, mkexpr(old_mask), mkexpr(cur_mask)));
2977       assign(new_res, binop(Iop_Or64,
2978                             mkexpr(old_res),
2979                             binop(Iop_And64,
2980                                   binop(Iop_Perm8x8,
2981                                         mkexpr(arg_l),
2982                                         binop(Iop_And64,
2983                                               mkexpr(old_arg),
2984                                               mkexpr(cur_mask))),
2985                                   mkexpr(cur_mask))));
2986
2987       old_arg = new_arg;
2988       old_mask = new_mask;
2989       old_res = new_res;
2990    }
2991    if (op) {
2992       new_res = newTemp(Ity_I64);
2993       assign(new_res, binop(Iop_Or64,
2994                             binop(Iop_And64,
2995                                   getDRegI64(dreg),
2996                                   unop(Iop_Not64, mkexpr(old_mask))),
2997                             mkexpr(old_res)));
2998       old_res = new_res;
2999    }
3000
3001    putDRegI64(dreg, mkexpr(old_res), condT);
3002    DIP("vtb%c.8 d%u, {", op ? 'x' : 'l', dreg);
3003    if (len > 0) {
3004       DIP("d%u-d%u", nreg, nreg + len);
3005    } else {
3006       DIP("d%u", nreg);
3007    }
3008    DIP("}, d%u\n", mreg);
3009    return True;
3010 }
3011
3012 /* VDUP (scalar)  */
3013 static
3014 Bool dis_neon_vdup ( UInt theInstr, IRTemp condT )
3015 {
3016    UInt Q = (theInstr >> 6) & 1;
3017    UInt dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
3018    UInt mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
3019    UInt imm4 = (theInstr >> 16) & 0xF;
3020    UInt index;
3021    UInt size;
3022    IRTemp arg_m;
3023    IRTemp res;
3024    IROp op, op2;
3025
3026    if ((imm4 == 0) || (imm4 == 8))
3027       return False;
3028    if ((Q == 1) && ((dreg & 1) == 1))
3029       return False;
3030    if (Q)
3031       dreg >>= 1;
3032    arg_m = newTemp(Ity_I64);
3033    assign(arg_m, getDRegI64(mreg));
3034    if (Q)
3035       res = newTemp(Ity_V128);
3036    else
3037       res = newTemp(Ity_I64);
3038    if ((imm4 & 1) == 1) {
3039       op = Q ? Iop_Dup8x16 : Iop_Dup8x8;
3040       op2 = Iop_GetElem8x8;
3041       index = imm4 >> 1;
3042       size = 8;
3043    } else if ((imm4 & 3) == 2) {
3044       op = Q ? Iop_Dup16x8 : Iop_Dup16x4;
3045       op2 = Iop_GetElem16x4;
3046       index = imm4 >> 2;
3047       size = 16;
3048    } else if ((imm4 & 7) == 4) {
3049       op = Q ? Iop_Dup32x4 : Iop_Dup32x2;
3050       op2 = Iop_GetElem32x2;
3051       index = imm4 >> 3;
3052       size = 32;
3053    } else {
3054       return False; // can this ever happen?
3055    }
3056    assign(res, unop(op, binop(op2, mkexpr(arg_m), mkU8(index))));
3057    if (Q) {
3058       putQReg(dreg, mkexpr(res), condT);
3059    } else {
3060       putDRegI64(dreg, mkexpr(res), condT);
3061    }
3062    DIP("vdup.%u %c%u, d%u[%u]\n", size, Q ? 'q' : 'd', dreg, mreg, index);
3063    return True;
3064 }
3065
3066 /* A7.4.1 Three registers of the same length */
3067 static
3068 Bool dis_neon_data_3same ( UInt theInstr, IRTemp condT )
3069 {
3070    /* In paths where this returns False, indicating a non-decodable
3071       instruction, there may still be some IR assignments to temporaries
3072       generated.  This is inconvenient but harmless, and the post-front-end
3073       IR optimisation pass will just remove them anyway.  So there's no
3074       effort made here to tidy it up.
3075    */
3076    UInt Q = (theInstr >> 6) & 1;
3077    UInt dreg = get_neon_d_regno(theInstr);
3078    UInt nreg = get_neon_n_regno(theInstr);
3079    UInt mreg = get_neon_m_regno(theInstr);
3080    UInt A = (theInstr >> 8) & 0xF;
3081    UInt B = (theInstr >> 4) & 1;
3082    UInt C = (theInstr >> 20) & 0x3;
3083    UInt U = (theInstr >> 24) & 1;
3084    UInt size = C;
3085
3086    IRTemp arg_n;
3087    IRTemp arg_m;
3088    IRTemp res;
3089
3090    if (Q) {
3091       arg_n = newTemp(Ity_V128);
3092       arg_m = newTemp(Ity_V128);
3093       res = newTemp(Ity_V128);
3094       assign(arg_n, getQReg(nreg));
3095       assign(arg_m, getQReg(mreg));
3096    } else {
3097       arg_n = newTemp(Ity_I64);
3098       arg_m = newTemp(Ity_I64);
3099       res = newTemp(Ity_I64);
3100       assign(arg_n, getDRegI64(nreg));
3101       assign(arg_m, getDRegI64(mreg));
3102    }
3103
3104    switch(A) {
3105       case 0:
3106          if (B == 0) {
3107             /* VHADD */
3108             ULong imm = 0;
3109             IRExpr *imm_val;
3110             IROp addOp;
3111             IROp andOp;
3112             IROp shOp;
3113             HChar regType = Q ? 'q' : 'd';
3114
3115             if (size == 3)
3116                return False;
3117             switch(size) {
3118                case 0: imm = 0x101010101010101LL; break;
3119                case 1: imm = 0x1000100010001LL; break;
3120                case 2: imm = 0x100000001LL; break;
3121                default: vassert(0);
3122             }
3123             if (Q) {
3124                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3125                andOp = Iop_AndV128;
3126             } else {
3127                imm_val = mkU64(imm);
3128                andOp = Iop_And64;
3129             }
3130             if (U) {
3131                switch(size) {
3132                   case 0:
3133                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3134                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3135                      break;
3136                   case 1:
3137                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3138                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3139                      break;
3140                   case 2:
3141                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3142                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3143                      break;
3144                   default:
3145                      vassert(0);
3146                }
3147             } else {
3148                switch(size) {
3149                   case 0:
3150                      addOp = Q ? Iop_Add8x16 : Iop_Add8x8;
3151                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3152                      break;
3153                   case 1:
3154                      addOp = Q ? Iop_Add16x8 : Iop_Add16x4;
3155                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3156                      break;
3157                   case 2:
3158                      addOp = Q ? Iop_Add32x4 : Iop_Add32x2;
3159                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3160                      break;
3161                   default:
3162                      vassert(0);
3163                }
3164             }
3165             assign(res,
3166                    binop(addOp,
3167                          binop(addOp,
3168                                binop(shOp, mkexpr(arg_m), mkU8(1)),
3169                                binop(shOp, mkexpr(arg_n), mkU8(1))),
3170                          binop(shOp,
3171                                binop(addOp,
3172                                      binop(andOp, mkexpr(arg_m), imm_val),
3173                                      binop(andOp, mkexpr(arg_n), imm_val)),
3174                                mkU8(1))));
3175             DIP("vhadd.%c%d %c%u, %c%u, %c%u\n",
3176                 U ? 'u' : 's', 8 << size, regType,
3177                 dreg, regType, nreg, regType, mreg);
3178          } else {
3179             /* VQADD */
3180             IROp op, op2;
3181             IRTemp tmp;
3182             HChar reg_t = Q ? 'q' : 'd';
3183             if (Q) {
3184                switch (size) {
3185                   case 0:
3186                      op = U ? Iop_QAdd8Ux16 : Iop_QAdd8Sx16;
3187                      op2 = Iop_Add8x16;
3188                      break;
3189                   case 1:
3190                      op = U ? Iop_QAdd16Ux8 : Iop_QAdd16Sx8;
3191                      op2 = Iop_Add16x8;
3192                      break;
3193                   case 2:
3194                      op = U ? Iop_QAdd32Ux4 : Iop_QAdd32Sx4;
3195                      op2 = Iop_Add32x4;
3196                      break;
3197                   case 3:
3198                      op = U ? Iop_QAdd64Ux2 : Iop_QAdd64Sx2;
3199                      op2 = Iop_Add64x2;
3200                      break;
3201                   default:
3202                      vassert(0);
3203                }
3204             } else {
3205                switch (size) {
3206                   case 0:
3207                      op = U ? Iop_QAdd8Ux8 : Iop_QAdd8Sx8;
3208                      op2 = Iop_Add8x8;
3209                      break;
3210                   case 1:
3211                      op = U ? Iop_QAdd16Ux4 : Iop_QAdd16Sx4;
3212                      op2 = Iop_Add16x4;
3213                      break;
3214                   case 2:
3215                      op = U ? Iop_QAdd32Ux2 : Iop_QAdd32Sx2;
3216                      op2 = Iop_Add32x2;
3217                      break;
3218                   case 3:
3219                      op = U ? Iop_QAdd64Ux1 : Iop_QAdd64Sx1;
3220                      op2 = Iop_Add64;
3221                      break;
3222                   default:
3223                      vassert(0);
3224                }
3225             }
3226             if (Q) {
3227                tmp = newTemp(Ity_V128);
3228             } else {
3229                tmp = newTemp(Ity_I64);
3230             }
3231             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3232             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3233             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3234             DIP("vqadd.%c%d %c%u %c%u, %c%u\n",
3235                 U ? 'u' : 's',
3236                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3237          }
3238          break;
3239       case 1:
3240          if (B == 0) {
3241             /* VRHADD */
3242             /* VRHADD C, A, B ::=
3243                  C = (A >> 1) + (B >> 1) + (((A & 1) + (B & 1) + 1) >> 1) */
3244             IROp shift_op, add_op;
3245             IRTemp cc;
3246             ULong one = 1;
3247             HChar reg_t = Q ? 'q' : 'd';
3248             switch (size) {
3249                case 0: one = (one <<  8) | one; /* fall through */
3250                case 1: one = (one << 16) | one; /* fall through */
3251                case 2: one = (one << 32) | one; break;
3252                case 3: return False;
3253                default: vassert(0);
3254             }
3255             if (Q) {
3256                switch (size) {
3257                   case 0:
3258                      shift_op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
3259                      add_op = Iop_Add8x16;
3260                      break;
3261                   case 1:
3262                      shift_op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
3263                      add_op = Iop_Add16x8;
3264                      break;
3265                   case 2:
3266                      shift_op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
3267                      add_op = Iop_Add32x4;
3268                      break;
3269                   case 3:
3270                      return False;
3271                   default:
3272                      vassert(0);
3273                }
3274             } else {
3275                switch (size) {
3276                   case 0:
3277                      shift_op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
3278                      add_op = Iop_Add8x8;
3279                      break;
3280                   case 1:
3281                      shift_op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
3282                      add_op = Iop_Add16x4;
3283                      break;
3284                   case 2:
3285                      shift_op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
3286                      add_op = Iop_Add32x2;
3287                      break;
3288                   case 3:
3289                      return False;
3290                   default:
3291                      vassert(0);
3292                }
3293             }
3294             if (Q) {
3295                cc = newTemp(Ity_V128);
3296                assign(cc, binop(shift_op,
3297                                 binop(add_op,
3298                                       binop(add_op,
3299                                             binop(Iop_AndV128,
3300                                                   mkexpr(arg_n),
3301                                                   binop(Iop_64HLtoV128,
3302                                                         mkU64(one),
3303                                                         mkU64(one))),
3304                                             binop(Iop_AndV128,
3305                                                   mkexpr(arg_m),
3306                                                   binop(Iop_64HLtoV128,
3307                                                         mkU64(one),
3308                                                         mkU64(one)))),
3309                                       binop(Iop_64HLtoV128,
3310                                             mkU64(one),
3311                                             mkU64(one))),
3312                                 mkU8(1)));
3313                assign(res, binop(add_op,
3314                                  binop(add_op,
3315                                        binop(shift_op,
3316                                              mkexpr(arg_n),
3317                                              mkU8(1)),
3318                                        binop(shift_op,
3319                                              mkexpr(arg_m),
3320                                              mkU8(1))),
3321                                  mkexpr(cc)));
3322             } else {
3323                cc = newTemp(Ity_I64);
3324                assign(cc, binop(shift_op,
3325                                 binop(add_op,
3326                                       binop(add_op,
3327                                             binop(Iop_And64,
3328                                                   mkexpr(arg_n),
3329                                                   mkU64(one)),
3330                                             binop(Iop_And64,
3331                                                   mkexpr(arg_m),
3332                                                   mkU64(one))),
3333                                       mkU64(one)),
3334                                 mkU8(1)));
3335                assign(res, binop(add_op,
3336                                  binop(add_op,
3337                                        binop(shift_op,
3338                                              mkexpr(arg_n),
3339                                              mkU8(1)),
3340                                        binop(shift_op,
3341                                              mkexpr(arg_m),
3342                                              mkU8(1))),
3343                                  mkexpr(cc)));
3344             }
3345             DIP("vrhadd.%c%d %c%u, %c%u, %c%u\n",
3346                 U ? 'u' : 's',
3347                 8 << size, reg_t, dreg, reg_t, nreg, reg_t, mreg);
3348          } else {
3349             if (U == 0)  {
3350                switch(C) {
3351                   case 0: {
3352                      /* VAND  */
3353                      HChar reg_t = Q ? 'q' : 'd';
3354                      if (Q) {
3355                         assign(res, binop(Iop_AndV128, mkexpr(arg_n),
3356                                                        mkexpr(arg_m)));
3357                      } else {
3358                         assign(res, binop(Iop_And64, mkexpr(arg_n),
3359                                                      mkexpr(arg_m)));
3360                      }
3361                      DIP("vand %c%u, %c%u, %c%u\n",
3362                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
3363                      break;
3364                   }
3365                   case 1: {
3366                      /* VBIC  */
3367                      HChar reg_t = Q ? 'q' : 'd';
3368                      if (Q) {
3369                         assign(res, binop(Iop_AndV128,mkexpr(arg_n),
3370                                unop(Iop_NotV128, mkexpr(arg_m))));
3371                      } else {
3372                         assign(res, binop(Iop_And64, mkexpr(arg_n),
3373                                unop(Iop_Not64, mkexpr(arg_m))));
3374                      }
3375                      DIP("vbic %c%u, %c%u, %c%u\n",
3376                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
3377                      break;
3378                   }
3379                   case 2:
3380                      if ( nreg != mreg) {
3381                         /* VORR  */
3382                         HChar reg_t = Q ? 'q' : 'd';
3383                         if (Q) {
3384                            assign(res, binop(Iop_OrV128, mkexpr(arg_n),
3385                                                          mkexpr(arg_m)));
3386                         } else {
3387                            assign(res, binop(Iop_Or64, mkexpr(arg_n),
3388                                                        mkexpr(arg_m)));
3389                         }
3390                         DIP("vorr %c%u, %c%u, %c%u\n",
3391                             reg_t, dreg, reg_t, nreg, reg_t, mreg);
3392                      } else {
3393                         /* VMOV  */
3394                         HChar reg_t = Q ? 'q' : 'd';
3395                         assign(res, mkexpr(arg_m));
3396                         DIP("vmov %c%u, %c%u\n", reg_t, dreg, reg_t, mreg);
3397                      }
3398                      break;
3399                   case 3:{
3400                      /* VORN  */
3401                      HChar reg_t = Q ? 'q' : 'd';
3402                      if (Q) {
3403                         assign(res, binop(Iop_OrV128,mkexpr(arg_n),
3404                                unop(Iop_NotV128, mkexpr(arg_m))));
3405                      } else {
3406                         assign(res, binop(Iop_Or64, mkexpr(arg_n),
3407                                unop(Iop_Not64, mkexpr(arg_m))));
3408                      }
3409                      DIP("vorn %c%u, %c%u, %c%u\n",
3410                          reg_t, dreg, reg_t, nreg, reg_t, mreg);
3411                      break;
3412                   }
3413                   default:
3414                      vassert(0);
3415                }
3416             } else {
3417                switch(C) {
3418                   case 0:
3419                      /* VEOR (XOR)  */
3420                      if (Q) {
3421                         assign(res, binop(Iop_XorV128, mkexpr(arg_n),
3422                                                        mkexpr(arg_m)));
3423                      } else {
3424                         assign(res, binop(Iop_Xor64, mkexpr(arg_n),
3425                                                      mkexpr(arg_m)));
3426                      }
3427                      DIP("veor %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
3428                            Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3429                      break;
3430                   case 1:
3431                      /* VBSL  */
3432                      if (Q) {
3433                         IRTemp reg_d = newTemp(Ity_V128);
3434                         assign(reg_d, getQReg(dreg));
3435                         assign(res,
3436                                binop(Iop_OrV128,
3437                                      binop(Iop_AndV128, mkexpr(arg_n),
3438                                                         mkexpr(reg_d)),
3439                                      binop(Iop_AndV128,
3440                                            mkexpr(arg_m),
3441                                            unop(Iop_NotV128,
3442                                                  mkexpr(reg_d)) ) ) );
3443                      } else {
3444                         IRTemp reg_d = newTemp(Ity_I64);
3445                         assign(reg_d, getDRegI64(dreg));
3446                         assign(res,
3447                                binop(Iop_Or64,
3448                                      binop(Iop_And64, mkexpr(arg_n),
3449                                                       mkexpr(reg_d)),
3450                                      binop(Iop_And64,
3451                                            mkexpr(arg_m),
3452                                            unop(Iop_Not64, mkexpr(reg_d)))));
3453                      }
3454                      DIP("vbsl %c%u, %c%u, %c%u\n",
3455                          Q ? 'q' : 'd', dreg,
3456                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3457                      break;
3458                   case 2:
3459                      /* VBIT  */
3460                      if (Q) {
3461                         IRTemp reg_d = newTemp(Ity_V128);
3462                         assign(reg_d, getQReg(dreg));
3463                         assign(res,
3464                                binop(Iop_OrV128,
3465                                      binop(Iop_AndV128, mkexpr(arg_n),
3466                                                         mkexpr(arg_m)),
3467                                      binop(Iop_AndV128,
3468                                            mkexpr(reg_d),
3469                                            unop(Iop_NotV128, mkexpr(arg_m)))));
3470                      } else {
3471                         IRTemp reg_d = newTemp(Ity_I64);
3472                         assign(reg_d, getDRegI64(dreg));
3473                         assign(res,
3474                                binop(Iop_Or64,
3475                                      binop(Iop_And64, mkexpr(arg_n),
3476                                                       mkexpr(arg_m)),
3477                                      binop(Iop_And64,
3478                                            mkexpr(reg_d),
3479                                            unop(Iop_Not64, mkexpr(arg_m)))));
3480                      }
3481                      DIP("vbit %c%u, %c%u, %c%u\n",
3482                          Q ? 'q' : 'd', dreg,
3483                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3484                      break;
3485                   case 3:
3486                      /* VBIF  */
3487                      if (Q) {
3488                         IRTemp reg_d = newTemp(Ity_V128);
3489                         assign(reg_d, getQReg(dreg));
3490                         assign(res,
3491                                binop(Iop_OrV128,
3492                                      binop(Iop_AndV128, mkexpr(reg_d),
3493                                                         mkexpr(arg_m)),
3494                                      binop(Iop_AndV128,
3495                                            mkexpr(arg_n),
3496                                            unop(Iop_NotV128, mkexpr(arg_m)))));
3497                      } else {
3498                         IRTemp reg_d = newTemp(Ity_I64);
3499                         assign(reg_d, getDRegI64(dreg));
3500                         assign(res,
3501                                binop(Iop_Or64,
3502                                      binop(Iop_And64, mkexpr(reg_d),
3503                                                       mkexpr(arg_m)),
3504                                      binop(Iop_And64,
3505                                            mkexpr(arg_n),
3506                                            unop(Iop_Not64, mkexpr(arg_m)))));
3507                      }
3508                      DIP("vbif %c%u, %c%u, %c%u\n",
3509                          Q ? 'q' : 'd', dreg,
3510                          Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
3511                      break;
3512                   default:
3513                      vassert(0);
3514                }
3515             }
3516          }
3517          break;
3518       case 2:
3519          if (B == 0) {
3520             /* VHSUB */
3521             /* (A >> 1) - (B >> 1) - (NOT (A) & B & 1)   */
3522             ULong imm = 0;
3523             IRExpr *imm_val;
3524             IROp subOp;
3525             IROp notOp;
3526             IROp andOp;
3527             IROp shOp;
3528             if (size == 3)
3529                return False;
3530             switch(size) {
3531                case 0: imm = 0x101010101010101LL; break;
3532                case 1: imm = 0x1000100010001LL; break;
3533                case 2: imm = 0x100000001LL; break;
3534                default: vassert(0);
3535             }
3536             if (Q) {
3537                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
3538                andOp = Iop_AndV128;
3539                notOp = Iop_NotV128;
3540             } else {
3541                imm_val = mkU64(imm);
3542                andOp = Iop_And64;
3543                notOp = Iop_Not64;
3544             }
3545             if (U) {
3546                switch(size) {
3547                   case 0:
3548                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3549                      shOp = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3550                      break;
3551                   case 1:
3552                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3553                      shOp = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3554                      break;
3555                   case 2:
3556                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3557                      shOp = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3558                      break;
3559                   default:
3560                      vassert(0);
3561                }
3562             } else {
3563                switch(size) {
3564                   case 0:
3565                      subOp = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3566                      shOp = Q ? Iop_SarN8x16 : Iop_SarN8x8;
3567                      break;
3568                   case 1:
3569                      subOp = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3570                      shOp = Q ? Iop_SarN16x8 : Iop_SarN16x4;
3571                      break;
3572                   case 2:
3573                      subOp = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3574                      shOp = Q ? Iop_SarN32x4 : Iop_SarN32x2;
3575                      break;
3576                   default:
3577                      vassert(0);
3578                }
3579             }
3580             assign(res,
3581                    binop(subOp,
3582                          binop(subOp,
3583                                binop(shOp, mkexpr(arg_n), mkU8(1)),
3584                                binop(shOp, mkexpr(arg_m), mkU8(1))),
3585                          binop(andOp,
3586                                binop(andOp,
3587                                      unop(notOp, mkexpr(arg_n)),
3588                                      mkexpr(arg_m)),
3589                                imm_val)));
3590             DIP("vhsub.%c%d %c%u, %c%u, %c%u\n",
3591                 U ? 'u' : 's', 8 << size,
3592                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3593                 mreg);
3594          } else {
3595             /* VQSUB */
3596             IROp op, op2;
3597             IRTemp tmp;
3598             if (Q) {
3599                switch (size) {
3600                   case 0:
3601                      op = U ? Iop_QSub8Ux16 : Iop_QSub8Sx16;
3602                      op2 = Iop_Sub8x16;
3603                      break;
3604                   case 1:
3605                      op = U ? Iop_QSub16Ux8 : Iop_QSub16Sx8;
3606                      op2 = Iop_Sub16x8;
3607                      break;
3608                   case 2:
3609                      op = U ? Iop_QSub32Ux4 : Iop_QSub32Sx4;
3610                      op2 = Iop_Sub32x4;
3611                      break;
3612                   case 3:
3613                      op = U ? Iop_QSub64Ux2 : Iop_QSub64Sx2;
3614                      op2 = Iop_Sub64x2;
3615                      break;
3616                   default:
3617                      vassert(0);
3618                }
3619             } else {
3620                switch (size) {
3621                   case 0:
3622                      op = U ? Iop_QSub8Ux8 : Iop_QSub8Sx8;
3623                      op2 = Iop_Sub8x8;
3624                      break;
3625                   case 1:
3626                      op = U ? Iop_QSub16Ux4 : Iop_QSub16Sx4;
3627                      op2 = Iop_Sub16x4;
3628                      break;
3629                   case 2:
3630                      op = U ? Iop_QSub32Ux2 : Iop_QSub32Sx2;
3631                      op2 = Iop_Sub32x2;
3632                      break;
3633                   case 3:
3634                      op = U ? Iop_QSub64Ux1 : Iop_QSub64Sx1;
3635                      op2 = Iop_Sub64;
3636                      break;
3637                   default:
3638                      vassert(0);
3639                }
3640             }
3641             if (Q)
3642                tmp = newTemp(Ity_V128);
3643             else
3644                tmp = newTemp(Ity_I64);
3645             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3646             assign(tmp, binop(op2, mkexpr(arg_n), mkexpr(arg_m)));
3647             setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
3648             DIP("vqsub.%c%d %c%u, %c%u, %c%u\n",
3649                 U ? 'u' : 's', 8 << size,
3650                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3651                 mreg);
3652          }
3653          break;
3654       case 3: {
3655             IROp op;
3656             if (Q) {
3657                switch (size) {
3658                   case 0: op = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16; break;
3659                   case 1: op = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8; break;
3660                   case 2: op = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4; break;
3661                   case 3: return False;
3662                   default: vassert(0);
3663                }
3664             } else {
3665                switch (size) {
3666                   case 0: op = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8; break;
3667                   case 1: op = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4; break;
3668                   case 2: op = U ? Iop_CmpGT32Ux2: Iop_CmpGT32Sx2; break;
3669                   case 3: return False;
3670                   default: vassert(0);
3671                }
3672             }
3673             if (B == 0) {
3674                /* VCGT  */
3675                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
3676                DIP("vcgt.%c%d %c%u, %c%u, %c%u\n",
3677                    U ? 'u' : 's', 8 << size,
3678                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3679                    mreg);
3680             } else {
3681                /* VCGE  */
3682                /* VCGE res, argn, argm
3683                     is equal to
3684                   VCGT tmp, argm, argn
3685                   VNOT res, tmp */
3686                assign(res,
3687                       unop(Q ? Iop_NotV128 : Iop_Not64,
3688                            binop(op, mkexpr(arg_m), mkexpr(arg_n))));
3689                DIP("vcge.%c%d %c%u, %c%u, %c%u\n",
3690                    U ? 'u' : 's', 8 << size,
3691                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
3692                    mreg);
3693             }
3694          }
3695          break;
3696       case 4:
3697          if (B == 0) {
3698             /* VSHL */
3699             IROp op = Iop_INVALID, sub_op = Iop_INVALID;
3700             IRTemp tmp = IRTemp_INVALID;
3701             if (U) {
3702                switch (size) {
3703                   case 0: op = Q ? Iop_Shl8x16 : Iop_Shl8x8; break;
3704                   case 1: op = Q ? Iop_Shl16x8 : Iop_Shl16x4; break;
3705                   case 2: op = Q ? Iop_Shl32x4 : Iop_Shl32x2; break;
3706                   case 3: op = Q ? Iop_Shl64x2 : Iop_Shl64; break;
3707                   default: vassert(0);
3708                }
3709             } else {
3710                tmp = newTemp(Q ? Ity_V128 : Ity_I64);
3711                switch (size) {
3712                   case 0:
3713                      op = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3714                      sub_op = Q ? Iop_Sub8x16 : Iop_Sub8x8;
3715                      break;
3716                   case 1:
3717                      op = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3718                      sub_op = Q ? Iop_Sub16x8 : Iop_Sub16x4;
3719                      break;
3720                   case 2:
3721                      op = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3722                      sub_op = Q ? Iop_Sub32x4 : Iop_Sub32x2;
3723                      break;
3724                   case 3:
3725                      op = Q ? Iop_Sar64x2 : Iop_Sar64;
3726                      sub_op = Q ? Iop_Sub64x2 : Iop_Sub64;
3727                      break;
3728                   default:
3729                      vassert(0);
3730                }
3731             }
3732             if (U) {
3733                if (!Q && (size == 3))
3734                   assign(res, binop(op, mkexpr(arg_m),
3735                                         unop(Iop_64to8, mkexpr(arg_n))));
3736                else
3737                   assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3738             } else {
3739                if (Q)
3740                   assign(tmp, binop(sub_op,
3741                                     binop(Iop_64HLtoV128, mkU64(0), mkU64(0)),
3742                                     mkexpr(arg_n)));
3743                else
3744                   assign(tmp, binop(sub_op, mkU64(0), mkexpr(arg_n)));
3745                if (!Q && (size == 3))
3746                   assign(res, binop(op, mkexpr(arg_m),
3747                                         unop(Iop_64to8, mkexpr(tmp))));
3748                else
3749                   assign(res, binop(op, mkexpr(arg_m), mkexpr(tmp)));
3750             }
3751             DIP("vshl.%c%d %c%u, %c%u, %c%u\n",
3752                 U ? 'u' : 's', 8 << size,
3753                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3754                 nreg);
3755          } else {
3756             /* VQSHL */
3757             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt;
3758             IRTemp tmp, shval, mask, old_shval;
3759             UInt i;
3760             ULong esize;
3761             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
3762             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3763             if (U) {
3764                switch (size) {
3765                   case 0:
3766                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
3767                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
3768                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3769                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3770                      break;
3771                   case 1:
3772                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
3773                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
3774                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3775                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3776                      break;
3777                   case 2:
3778                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
3779                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
3780                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3781                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3782                      break;
3783                   case 3:
3784                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
3785                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
3786                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3787                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3788                      break;
3789                   default:
3790                      vassert(0);
3791                }
3792             } else {
3793                switch (size) {
3794                   case 0:
3795                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
3796                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
3797                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3798                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3799                      break;
3800                   case 1:
3801                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
3802                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
3803                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3804                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3805                      break;
3806                   case 2:
3807                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
3808                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
3809                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3810                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3811                      break;
3812                   case 3:
3813                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
3814                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
3815                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3816                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3817                      break;
3818                   default:
3819                      vassert(0);
3820                }
3821             }
3822             if (Q) {
3823                tmp = newTemp(Ity_V128);
3824                shval = newTemp(Ity_V128);
3825                mask = newTemp(Ity_V128);
3826             } else {
3827                tmp = newTemp(Ity_I64);
3828                shval = newTemp(Ity_I64);
3829                mask = newTemp(Ity_I64);
3830             }
3831             assign(res, binop(op, mkexpr(arg_m), mkexpr(arg_n)));
3832             /* Only least significant byte from second argument is used.
3833                Copy this byte to the whole vector element. */
3834             assign(shval, binop(op_shrn,
3835                                 binop(op_shln,
3836                                        mkexpr(arg_n),
3837                                        mkU8((8 << size) - 8)),
3838                                 mkU8((8 << size) - 8)));
3839             for(i = 0; i < size; i++) {
3840                old_shval = shval;
3841                shval = newTemp(Q ? Ity_V128 : Ity_I64);
3842                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3843                                    mkexpr(old_shval),
3844                                    binop(op_shln,
3845                                          mkexpr(old_shval),
3846                                          mkU8(8 << i))));
3847             }
3848             /* If shift is greater or equal to the element size and
3849                element is non-zero, then QC flag should be set. */
3850             esize = (8 << size) - 1;
3851             esize = (esize <<  8) | esize;
3852             esize = (esize << 16) | esize;
3853             esize = (esize << 32) | esize;
3854             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3855                              binop(cmp_gt, mkexpr(shval),
3856                                            Q ? mkU128(esize) : mkU64(esize)),
3857                              unop(cmp_neq, mkexpr(arg_m))),
3858                        Q ? mkU128(0) : mkU64(0),
3859                        Q, condT);
3860             /* Othervise QC flag should be set if shift value is positive and
3861                result beign rightshifted the same value is not equal to left
3862                argument. */
3863             assign(mask, binop(cmp_gt, mkexpr(shval),
3864                                        Q ? mkU128(0) : mkU64(0)));
3865             if (!Q && size == 3)
3866                assign(tmp, binop(op_rev, mkexpr(res),
3867                                          unop(Iop_64to8, mkexpr(arg_n))));
3868             else
3869                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
3870             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
3871                              mkexpr(tmp), mkexpr(mask)),
3872                        binop(Q ? Iop_AndV128 : Iop_And64,
3873                              mkexpr(arg_m), mkexpr(mask)),
3874                        Q, condT);
3875             DIP("vqshl.%c%d %c%u, %c%u, %c%u\n",
3876                 U ? 'u' : 's', 8 << size,
3877                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
3878                 nreg);
3879          }
3880          break;
3881       case 5:
3882          if (B == 0) {
3883             /* VRSHL */
3884             IROp op, op_shrn, op_shln, cmp_gt, op_add;
3885             IRTemp shval, old_shval, imm_val, round;
3886             UInt i;
3887             ULong imm;
3888             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
3889             imm = 1L;
3890             switch (size) {
3891                case 0: imm = (imm <<  8) | imm; /* fall through */
3892                case 1: imm = (imm << 16) | imm; /* fall through */
3893                case 2: imm = (imm << 32) | imm; /* fall through */
3894                case 3: break;
3895                default: vassert(0);
3896             }
3897             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
3898             round = newTemp(Q ? Ity_V128 : Ity_I64);
3899             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
3900             if (U) {
3901                switch (size) {
3902                   case 0:
3903                      op = Q ? Iop_Shl8x16 : Iop_Shl8x8;
3904                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3905                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3906                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3907                      break;
3908                   case 1:
3909                      op = Q ? Iop_Shl16x8 : Iop_Shl16x4;
3910                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3911                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3912                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3913                      break;
3914                   case 2:
3915                      op = Q ? Iop_Shl32x4 : Iop_Shl32x2;
3916                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3917                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3918                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3919                      break;
3920                   case 3:
3921                      op = Q ? Iop_Shl64x2 : Iop_Shl64;
3922                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
3923                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3924                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3925                      break;
3926                   default:
3927                      vassert(0);
3928                }
3929             } else {
3930                switch (size) {
3931                   case 0:
3932                      op = Q ? Iop_Sal8x16 : Iop_Sal8x8;
3933                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
3934                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
3935                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
3936                      break;
3937                   case 1:
3938                      op = Q ? Iop_Sal16x8 : Iop_Sal16x4;
3939                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
3940                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
3941                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
3942                      break;
3943                   case 2:
3944                      op = Q ? Iop_Sal32x4 : Iop_Sal32x2;
3945                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
3946                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
3947                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
3948                      break;
3949                   case 3:
3950                      op = Q ? Iop_Sal64x2 : Iop_Sal64x1;
3951                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
3952                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
3953                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
3954                      break;
3955                   default:
3956                      vassert(0);
3957                }
3958             }
3959             if (Q) {
3960                shval = newTemp(Ity_V128);
3961             } else {
3962                shval = newTemp(Ity_I64);
3963             }
3964             /* Only least significant byte from second argument is used.
3965                Copy this byte to the whole vector element. */
3966             assign(shval, binop(op_shrn,
3967                                 binop(op_shln,
3968                                        mkexpr(arg_n),
3969                                        mkU8((8 << size) - 8)),
3970                                 mkU8((8 << size) - 8)));
3971             for (i = 0; i < size; i++) {
3972                old_shval = shval;
3973                shval = newTemp(Q ? Ity_V128 : Ity_I64);
3974                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
3975                                    mkexpr(old_shval),
3976                                    binop(op_shln,
3977                                          mkexpr(old_shval),
3978                                          mkU8(8 << i))));
3979             }
3980             /* Compute the result */
3981             if (!Q && size == 3 && U) {
3982                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
3983                                    binop(op,
3984                                          mkexpr(arg_m),
3985                                          unop(Iop_64to8,
3986                                               binop(op_add,
3987                                                     mkexpr(arg_n),
3988                                                     mkexpr(imm_val)))),
3989                                    binop(Q ? Iop_AndV128 : Iop_And64,
3990                                          mkexpr(imm_val),
3991                                          binop(cmp_gt,
3992                                                Q ? mkU128(0) : mkU64(0),
3993                                                mkexpr(arg_n)))));
3994                assign(res, binop(op_add,
3995                                  binop(op,
3996                                        mkexpr(arg_m),
3997                                        unop(Iop_64to8, mkexpr(arg_n))),
3998                                  mkexpr(round)));
3999             } else {
4000                assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
4001                                    binop(op,
4002                                          mkexpr(arg_m),
4003                                          binop(op_add,
4004                                                mkexpr(arg_n),
4005                                                mkexpr(imm_val))),
4006                                    binop(Q ? Iop_AndV128 : Iop_And64,
4007                                          mkexpr(imm_val),
4008                                          binop(cmp_gt,
4009                                                Q ? mkU128(0) : mkU64(0),
4010                                                mkexpr(arg_n)))));
4011                assign(res, binop(op_add,
4012                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4013                                  mkexpr(round)));
4014             }
4015             DIP("vrshl.%c%d %c%u, %c%u, %c%u\n",
4016                 U ? 'u' : 's', 8 << size,
4017                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
4018                 nreg);
4019          } else {
4020             /* VQRSHL */
4021             IROp op, op_rev, op_shrn, op_shln, cmp_neq, cmp_gt, op_add;
4022             IRTemp tmp, shval, mask, old_shval, imm_val, round;
4023             UInt i;
4024             ULong esize, imm;
4025             cmp_neq = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8;
4026             cmp_gt = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
4027             imm = 1L;
4028             switch (size) {
4029                case 0: imm = (imm <<  8) | imm; /* fall through */
4030                case 1: imm = (imm << 16) | imm; /* fall through */
4031                case 2: imm = (imm << 32) | imm; /* fall through */
4032                case 3: break;
4033                default: vassert(0);
4034             }
4035             imm_val = newTemp(Q ? Ity_V128 : Ity_I64);
4036             round = newTemp(Q ? Ity_V128 : Ity_I64);
4037             assign(imm_val, Q ? mkU128(imm) : mkU64(imm));
4038             if (U) {
4039                switch (size) {
4040                   case 0:
4041                      op = Q ? Iop_QShl8x16 : Iop_QShl8x8;
4042                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4043                      op_rev = Q ? Iop_Shr8x16 : Iop_Shr8x8;
4044                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4045                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4046                      break;
4047                   case 1:
4048                      op = Q ? Iop_QShl16x8 : Iop_QShl16x4;
4049                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4050                      op_rev = Q ? Iop_Shr16x8 : Iop_Shr16x4;
4051                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4052                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4053                      break;
4054                   case 2:
4055                      op = Q ? Iop_QShl32x4 : Iop_QShl32x2;
4056                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4057                      op_rev = Q ? Iop_Shr32x4 : Iop_Shr32x2;
4058                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4059                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4060                      break;
4061                   case 3:
4062                      op = Q ? Iop_QShl64x2 : Iop_QShl64x1;
4063                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
4064                      op_rev = Q ? Iop_Shr64x2 : Iop_Shr64;
4065                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4066                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4067                      break;
4068                   default:
4069                      vassert(0);
4070                }
4071             } else {
4072                switch (size) {
4073                   case 0:
4074                      op = Q ? Iop_QSal8x16 : Iop_QSal8x8;
4075                      op_add = Q ? Iop_Add8x16 : Iop_Add8x8;
4076                      op_rev = Q ? Iop_Sar8x16 : Iop_Sar8x8;
4077                      op_shrn = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
4078                      op_shln = Q ? Iop_ShlN8x16 : Iop_ShlN8x8;
4079                      break;
4080                   case 1:
4081                      op = Q ? Iop_QSal16x8 : Iop_QSal16x4;
4082                      op_add = Q ? Iop_Add16x8 : Iop_Add16x4;
4083                      op_rev = Q ? Iop_Sar16x8 : Iop_Sar16x4;
4084                      op_shrn = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
4085                      op_shln = Q ? Iop_ShlN16x8 : Iop_ShlN16x4;
4086                      break;
4087                   case 2:
4088                      op = Q ? Iop_QSal32x4 : Iop_QSal32x2;
4089                      op_add = Q ? Iop_Add32x4 : Iop_Add32x2;
4090                      op_rev = Q ? Iop_Sar32x4 : Iop_Sar32x2;
4091                      op_shrn = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
4092                      op_shln = Q ? Iop_ShlN32x4 : Iop_ShlN32x2;
4093                      break;
4094                   case 3:
4095                      op = Q ? Iop_QSal64x2 : Iop_QSal64x1;
4096                      op_add = Q ? Iop_Add64x2 : Iop_Add64;
4097                      op_rev = Q ? Iop_Sar64x2 : Iop_Sar64;
4098                      op_shrn = Q ? Iop_ShrN64x2 : Iop_Shr64;
4099                      op_shln = Q ? Iop_ShlN64x2 : Iop_Shl64;
4100                      break;
4101                   default:
4102                      vassert(0);
4103                }
4104             }
4105             if (Q) {
4106                tmp = newTemp(Ity_V128);
4107                shval = newTemp(Ity_V128);
4108                mask = newTemp(Ity_V128);
4109             } else {
4110                tmp = newTemp(Ity_I64);
4111                shval = newTemp(Ity_I64);
4112                mask = newTemp(Ity_I64);
4113             }
4114             /* Only least significant byte from second argument is used.
4115                Copy this byte to the whole vector element. */
4116             assign(shval, binop(op_shrn,
4117                                 binop(op_shln,
4118                                        mkexpr(arg_n),
4119                                        mkU8((8 << size) - 8)),
4120                                 mkU8((8 << size) - 8)));
4121             for (i = 0; i < size; i++) {
4122                old_shval = shval;
4123                shval = newTemp(Q ? Ity_V128 : Ity_I64);
4124                assign(shval, binop(Q ? Iop_OrV128 : Iop_Or64,
4125                                    mkexpr(old_shval),
4126                                    binop(op_shln,
4127                                          mkexpr(old_shval),
4128                                          mkU8(8 << i))));
4129             }
4130             /* Compute the result */
4131             assign(round, binop(Q ? Iop_AndV128 : Iop_And64,
4132                                 binop(op,
4133                                       mkexpr(arg_m),
4134                                       binop(op_add,
4135                                             mkexpr(arg_n),
4136                                             mkexpr(imm_val))),
4137                                 binop(Q ? Iop_AndV128 : Iop_And64,
4138                                       mkexpr(imm_val),
4139                                       binop(cmp_gt,
4140                                             Q ? mkU128(0) : mkU64(0),
4141                                             mkexpr(arg_n)))));
4142             assign(res, binop(op_add,
4143                               binop(op, mkexpr(arg_m), mkexpr(arg_n)),
4144                               mkexpr(round)));
4145             /* If shift is greater or equal to the element size and element is
4146                non-zero, then QC flag should be set. */
4147             esize = (8 << size) - 1;
4148             esize = (esize <<  8) | esize;
4149             esize = (esize << 16) | esize;
4150             esize = (esize << 32) | esize;
4151             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4152                              binop(cmp_gt, mkexpr(shval),
4153                                            Q ? mkU128(esize) : mkU64(esize)),
4154                              unop(cmp_neq, mkexpr(arg_m))),
4155                        Q ? mkU128(0) : mkU64(0),
4156                        Q, condT);
4157             /* Othervise QC flag should be set if shift value is positive and
4158                result beign rightshifted the same value is not equal to left
4159                argument. */
4160             assign(mask, binop(cmp_gt, mkexpr(shval),
4161                                Q ? mkU128(0) : mkU64(0)));
4162             if (!Q && size == 3)
4163                assign(tmp, binop(op_rev, mkexpr(res),
4164                                          unop(Iop_64to8, mkexpr(arg_n))));
4165             else
4166                assign(tmp, binop(op_rev, mkexpr(res), mkexpr(arg_n)));
4167             setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4168                              mkexpr(tmp), mkexpr(mask)),
4169                        binop(Q ? Iop_AndV128 : Iop_And64,
4170                              mkexpr(arg_m), mkexpr(mask)),
4171                        Q, condT);
4172             DIP("vqrshl.%c%d %c%u, %c%u, %c%u\n",
4173                 U ? 'u' : 's', 8 << size,
4174                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, Q ? 'q' : 'd',
4175                 nreg);
4176          }
4177          break;
4178       case 6:
4179          /* VMAX, VMIN  */
4180          if (B == 0) {
4181             /* VMAX */
4182             IROp op;
4183             if (U == 0) {
4184                switch (size) {
4185                   case 0: op = Q ? Iop_Max8Sx16 : Iop_Max8Sx8; break;
4186                   case 1: op = Q ? Iop_Max16Sx8 : Iop_Max16Sx4; break;
4187                   case 2: op = Q ? Iop_Max32Sx4 : Iop_Max32Sx2; break;
4188                   case 3: return False;
4189                   default: vassert(0);
4190                }
4191             } else {
4192                switch (size) {
4193                   case 0: op = Q ? Iop_Max8Ux16 : Iop_Max8Ux8; break;
4194                   case 1: op = Q ? Iop_Max16Ux8 : Iop_Max16Ux4; break;
4195                   case 2: op = Q ? Iop_Max32Ux4 : Iop_Max32Ux2; break;
4196                   case 3: return False;
4197                   default: vassert(0);
4198                }
4199             }
4200             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4201             DIP("vmax.%c%d %c%u, %c%u, %c%u\n",
4202                 U ? 'u' : 's', 8 << size,
4203                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4204                 mreg);
4205          } else {
4206             /* VMIN */
4207             IROp op;
4208             if (U == 0) {
4209                switch (size) {
4210                   case 0: op = Q ? Iop_Min8Sx16 : Iop_Min8Sx8; break;
4211                   case 1: op = Q ? Iop_Min16Sx8 : Iop_Min16Sx4; break;
4212                   case 2: op = Q ? Iop_Min32Sx4 : Iop_Min32Sx2; break;
4213                   case 3: return False;
4214                   default: vassert(0);
4215                }
4216             } else {
4217                switch (size) {
4218                   case 0: op = Q ? Iop_Min8Ux16 : Iop_Min8Ux8; break;
4219                   case 1: op = Q ? Iop_Min16Ux8 : Iop_Min16Ux4; break;
4220                   case 2: op = Q ? Iop_Min32Ux4 : Iop_Min32Ux2; break;
4221                   case 3: return False;
4222                   default: vassert(0);
4223                }
4224             }
4225             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4226             DIP("vmin.%c%d %c%u, %c%u, %c%u\n",
4227                 U ? 'u' : 's', 8 << size,
4228                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4229                 mreg);
4230          }
4231          break;
4232       case 7:
4233          if (B == 0) {
4234             /* VABD */
4235             IROp op_cmp, op_sub;
4236             IRTemp cond;
4237             if ((theInstr >> 23) & 1) {
4238                vpanic("VABDL should not be in dis_neon_data_3same\n");
4239             }
4240             if (Q) {
4241                switch (size) {
4242                   case 0:
4243                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4244                      op_sub = Iop_Sub8x16;
4245                      break;
4246                   case 1:
4247                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4248                      op_sub = Iop_Sub16x8;
4249                      break;
4250                   case 2:
4251                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4252                      op_sub = Iop_Sub32x4;
4253                      break;
4254                   case 3:
4255                      return False;
4256                   default:
4257                      vassert(0);
4258                }
4259             } else {
4260                switch (size) {
4261                   case 0:
4262                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4263                      op_sub = Iop_Sub8x8;
4264                      break;
4265                   case 1:
4266                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4267                      op_sub = Iop_Sub16x4;
4268                      break;
4269                   case 2:
4270                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4271                      op_sub = Iop_Sub32x2;
4272                      break;
4273                   case 3:
4274                      return False;
4275                   default:
4276                      vassert(0);
4277                }
4278             }
4279             if (Q) {
4280                cond = newTemp(Ity_V128);
4281             } else {
4282                cond = newTemp(Ity_I64);
4283             }
4284             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4285             assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
4286                               binop(Q ? Iop_AndV128 : Iop_And64,
4287                                     binop(op_sub, mkexpr(arg_n),
4288                                                   mkexpr(arg_m)),
4289                                     mkexpr(cond)),
4290                               binop(Q ? Iop_AndV128 : Iop_And64,
4291                                     binop(op_sub, mkexpr(arg_m),
4292                                                   mkexpr(arg_n)),
4293                                     unop(Q ? Iop_NotV128 : Iop_Not64,
4294                                          mkexpr(cond)))));
4295             DIP("vabd.%c%d %c%u, %c%u, %c%u\n",
4296                 U ? 'u' : 's', 8 << size,
4297                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4298                 mreg);
4299          } else {
4300             /* VABA */
4301             IROp op_cmp, op_sub, op_add;
4302             IRTemp cond, acc, tmp;
4303             if ((theInstr >> 23) & 1) {
4304                vpanic("VABAL should not be in dis_neon_data_3same");
4305             }
4306             if (Q) {
4307                switch (size) {
4308                   case 0:
4309                      op_cmp = U ? Iop_CmpGT8Ux16 : Iop_CmpGT8Sx16;
4310                      op_sub = Iop_Sub8x16;
4311                      op_add = Iop_Add8x16;
4312                      break;
4313                   case 1:
4314                      op_cmp = U ? Iop_CmpGT16Ux8 : Iop_CmpGT16Sx8;
4315                      op_sub = Iop_Sub16x8;
4316                      op_add = Iop_Add16x8;
4317                      break;
4318                   case 2:
4319                      op_cmp = U ? Iop_CmpGT32Ux4 : Iop_CmpGT32Sx4;
4320                      op_sub = Iop_Sub32x4;
4321                      op_add = Iop_Add32x4;
4322                      break;
4323                   case 3:
4324                      return False;
4325                   default:
4326                      vassert(0);
4327                }
4328             } else {
4329                switch (size) {
4330                   case 0:
4331                      op_cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
4332                      op_sub = Iop_Sub8x8;
4333                      op_add = Iop_Add8x8;
4334                      break;
4335                   case 1:
4336                      op_cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
4337                      op_sub = Iop_Sub16x4;
4338                      op_add = Iop_Add16x4;
4339                      break;
4340                   case 2:
4341                      op_cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
4342                      op_sub = Iop_Sub32x2;
4343                      op_add = Iop_Add32x2;
4344                      break;
4345                   case 3:
4346                      return False;
4347                   default:
4348                      vassert(0);
4349                }
4350             }
4351             if (Q) {
4352                cond = newTemp(Ity_V128);
4353                acc = newTemp(Ity_V128);
4354                tmp = newTemp(Ity_V128);
4355                assign(acc, getQReg(dreg));
4356             } else {
4357                cond = newTemp(Ity_I64);
4358                acc = newTemp(Ity_I64);
4359                tmp = newTemp(Ity_I64);
4360                assign(acc, getDRegI64(dreg));
4361             }
4362             assign(cond, binop(op_cmp, mkexpr(arg_n), mkexpr(arg_m)));
4363             assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
4364                               binop(Q ? Iop_AndV128 : Iop_And64,
4365                                     binop(op_sub, mkexpr(arg_n),
4366                                                   mkexpr(arg_m)),
4367                                     mkexpr(cond)),
4368                               binop(Q ? Iop_AndV128 : Iop_And64,
4369                                     binop(op_sub, mkexpr(arg_m),
4370                                                   mkexpr(arg_n)),
4371                                     unop(Q ? Iop_NotV128 : Iop_Not64,
4372                                          mkexpr(cond)))));
4373             assign(res, binop(op_add, mkexpr(acc), mkexpr(tmp)));
4374             DIP("vaba.%c%d %c%u, %c%u, %c%u\n",
4375                 U ? 'u' : 's', 8 << size,
4376                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4377                 mreg);
4378          }
4379          break;
4380       case 8:
4381          if (B == 0) {
4382             IROp op;
4383             if (U == 0) {
4384                /* VADD  */
4385                switch (size) {
4386                   case 0: op = Q ? Iop_Add8x16 : Iop_Add8x8; break;
4387                   case 1: op = Q ? Iop_Add16x8 : Iop_Add16x4; break;
4388                   case 2: op = Q ? Iop_Add32x4 : Iop_Add32x2; break;
4389                   case 3: op = Q ? Iop_Add64x2 : Iop_Add64; break;
4390                   default: vassert(0);
4391                }
4392                DIP("vadd.i%d %c%u, %c%u, %c%u\n",
4393                    8 << size, Q ? 'q' : 'd',
4394                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4395             } else {
4396                /* VSUB  */
4397                switch (size) {
4398                   case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
4399                   case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
4400                   case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
4401                   case 3: op = Q ? Iop_Sub64x2 : Iop_Sub64; break;
4402                   default: vassert(0);
4403                }
4404                DIP("vsub.i%d %c%u, %c%u, %c%u\n",
4405                    8 << size, Q ? 'q' : 'd',
4406                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4407             }
4408             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4409          } else {
4410             IROp op;
4411             switch (size) {
4412                case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
4413                case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
4414                case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
4415                case 3: op = Q ? Iop_CmpNEZ64x2 : Iop_CmpwNEZ64; break;
4416                default: vassert(0);
4417             }
4418             if (U == 0) {
4419                /* VTST  */
4420                assign(res, unop(op, binop(Q ? Iop_AndV128 : Iop_And64,
4421                                           mkexpr(arg_n),
4422                                           mkexpr(arg_m))));
4423                DIP("vtst.%d %c%u, %c%u, %c%u\n",
4424                    8 << size, Q ? 'q' : 'd',
4425                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4426             } else {
4427                /* VCEQ  */
4428                assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
4429                                 unop(op,
4430                                      binop(Q ? Iop_XorV128 : Iop_Xor64,
4431                                            mkexpr(arg_n),
4432                                            mkexpr(arg_m)))));
4433                DIP("vceq.i%d %c%u, %c%u, %c%u\n",
4434                    8 << size, Q ? 'q' : 'd',
4435                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4436             }
4437          }
4438          break;
4439       case 9:
4440          if (B == 0) {
4441             /* VMLA, VMLS (integer) */
4442             IROp op, op2;
4443             UInt P = (theInstr >> 24) & 1;
4444             if (P) {
4445                switch (size) {
4446                   case 0:
4447                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4448                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
4449                      break;
4450                   case 1:
4451                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4452                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
4453                      break;
4454                   case 2:
4455                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4456                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
4457                      break;
4458                   case 3:
4459                      return False;
4460                   default:
4461                      vassert(0);
4462                }
4463             } else {
4464                switch (size) {
4465                   case 0:
4466                      op = Q ? Iop_Mul8x16 : Iop_Mul8x8;
4467                      op2 = Q ? Iop_Add8x16 : Iop_Add8x8;
4468                      break;
4469                   case 1:
4470                      op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
4471                      op2 = Q ? Iop_Add16x8 : Iop_Add16x4;
4472                      break;
4473                   case 2:
4474                      op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
4475                      op2 = Q ? Iop_Add32x4 : Iop_Add32x2;
4476                      break;
4477                   case 3:
4478                      return False;
4479                   default:
4480                      vassert(0);
4481                }
4482             }
4483             assign(res, binop(op2,
4484                               Q ? getQReg(dreg) : getDRegI64(dreg),
4485                               binop(op, mkexpr(arg_n), mkexpr(arg_m))));
4486             DIP("vml%c.i%d %c%u, %c%u, %c%u\n",
4487                 P ? 's' : 'a', 8 << size,
4488                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4489                 mreg);
4490          } else {
4491             /* VMUL */
4492             IROp op;
4493             UInt P = (theInstr >> 24) & 1;
4494             if (P) {
4495                switch (size) {
4496                   case 0:
4497                      op = Q ? Iop_PolynomialMul8x16 : Iop_PolynomialMul8x8;
4498                      break;
4499                   case 1: case 2: case 3: return False;
4500                   default: vassert(0);
4501                }
4502             } else {
4503                switch (size) {
4504                   case 0: op = Q ? Iop_Mul8x16 : Iop_Mul8x8; break;
4505                   case 1: op = Q ? Iop_Mul16x8 : Iop_Mul16x4; break;
4506                   case 2: op = Q ? Iop_Mul32x4 : Iop_Mul32x2; break;
4507                   case 3: return False;
4508                   default: vassert(0);
4509                }
4510             }
4511             assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4512             DIP("vmul.%c%d %c%u, %c%u, %c%u\n",
4513                 P ? 'p' : 'i', 8 << size,
4514                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd',
4515                 mreg);
4516          }
4517          break;
4518       case 10: {
4519          /* VPMAX, VPMIN  */
4520          UInt P = (theInstr >> 4) & 1;
4521          IROp op;
4522          if (Q)
4523             return False;
4524          if (P) {
4525             switch (size) {
4526                case 0: op = U ? Iop_PwMin8Ux8  : Iop_PwMin8Sx8; break;
4527                case 1: op = U ? Iop_PwMin16Ux4 : Iop_PwMin16Sx4; break;
4528                case 2: op = U ? Iop_PwMin32Ux2 : Iop_PwMin32Sx2; break;
4529                case 3: return False;
4530                default: vassert(0);
4531             }
4532          } else {
4533             switch (size) {
4534                case 0: op = U ? Iop_PwMax8Ux8  : Iop_PwMax8Sx8; break;
4535                case 1: op = U ? Iop_PwMax16Ux4 : Iop_PwMax16Sx4; break;
4536                case 2: op = U ? Iop_PwMax32Ux2 : Iop_PwMax32Sx2; break;
4537                case 3: return False;
4538                default: vassert(0);
4539             }
4540          }
4541          assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4542          DIP("vp%s.%c%d %c%u, %c%u, %c%u\n",
4543              P ? "min" : "max", U ? 'u' : 's',
4544              8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4545              Q ? 'q' : 'd', mreg);
4546          break;
4547       }
4548       case 11:
4549          if (B == 0) {
4550             if (U == 0) {
4551                /* VQDMULH  */
4552                IROp op ,op2;
4553                ULong imm;
4554                switch (size) {
4555                   case 0: case 3:
4556                      return False;
4557                   case 1:
4558                      op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
4559                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4560                      imm = 1LL << 15;
4561                      imm = (imm << 16) | imm;
4562                      imm = (imm << 32) | imm;
4563                      break;
4564                   case 2:
4565                      op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
4566                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4567                      imm = 1LL << 31;
4568                      imm = (imm << 32) | imm;
4569                      break;
4570                   default:
4571                      vassert(0);
4572                }
4573                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4574                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4575                                 binop(op2, mkexpr(arg_n),
4576                                            Q ? mkU128(imm) : mkU64(imm)),
4577                                 binop(op2, mkexpr(arg_m),
4578                                            Q ? mkU128(imm) : mkU64(imm))),
4579                           Q ? mkU128(0) : mkU64(0),
4580                           Q, condT);
4581                DIP("vqdmulh.s%d %c%u, %c%u, %c%u\n",
4582                    8 << size, Q ? 'q' : 'd',
4583                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4584             } else {
4585                /* VQRDMULH */
4586                IROp op ,op2;
4587                ULong imm;
4588                switch(size) {
4589                   case 0: case 3:
4590                      return False;
4591                   case 1:
4592                      imm = 1LL << 15;
4593                      imm = (imm << 16) | imm;
4594                      imm = (imm << 32) | imm;
4595                      op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
4596                      op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
4597                      break;
4598                   case 2:
4599                      imm = 1LL << 31;
4600                      imm = (imm << 32) | imm;
4601                      op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
4602                      op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
4603                      break;
4604                   default:
4605                      vassert(0);
4606                }
4607                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4608                setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
4609                                 binop(op2, mkexpr(arg_n),
4610                                            Q ? mkU128(imm) : mkU64(imm)),
4611                                 binop(op2, mkexpr(arg_m),
4612                                            Q ? mkU128(imm) : mkU64(imm))),
4613                           Q ? mkU128(0) : mkU64(0),
4614                           Q, condT);
4615                DIP("vqrdmulh.s%d %c%u, %c%u, %c%u\n",
4616                    8 << size, Q ? 'q' : 'd',
4617                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4618             }
4619          } else {
4620             if (U == 0) {
4621                /* VPADD */
4622                IROp op;
4623                if (Q)
4624                   return False;
4625                switch (size) {
4626                   case 0: op = Q ? Iop_PwAdd8x16 : Iop_PwAdd8x8;  break;
4627                   case 1: op = Q ? Iop_PwAdd16x8 : Iop_PwAdd16x4; break;
4628                   case 2: op = Q ? Iop_PwAdd32x4 : Iop_PwAdd32x2; break;
4629                   case 3: return False;
4630                   default: vassert(0);
4631                }
4632                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4633                DIP("vpadd.i%d %c%u, %c%u, %c%u\n",
4634                    8 << size, Q ? 'q' : 'd',
4635                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4636             } else {
4637                return False;
4638             }
4639          }
4640          break;
4641       case 12: {
4642          return False;
4643       }
4644       /* Starting from here these are FP SIMD cases */
4645       case 13:
4646          if (B == 0) {
4647             IROp op;
4648             if (U == 0) {
4649                if ((C >> 1) == 0) {
4650                   /* VADD  */
4651                   op = Q ? Iop_Add32Fx4 : Iop_Add32Fx2 ;
4652                   DIP("vadd.f32 %c%u, %c%u, %c%u\n",
4653                       Q ? 'q' : 'd', dreg,
4654                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4655                } else {
4656                   /* VSUB  */
4657                   op = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2 ;
4658                   DIP("vsub.f32 %c%u, %c%u, %c%u\n",
4659                       Q ? 'q' : 'd', dreg,
4660                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4661                }
4662             } else {
4663                if ((C >> 1) == 0) {
4664                   /* VPADD */
4665                   if (Q)
4666                      return False;
4667                   op = Iop_PwAdd32Fx2;
4668                   DIP("vpadd.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4669                } else {
4670                   /* VABD  */
4671                   if (Q) {
4672                      assign(res, unop(Iop_Abs32Fx4,
4673                                       triop(Iop_Sub32Fx4,
4674                                             get_FAKE_roundingmode(),
4675                                             mkexpr(arg_n),
4676                                             mkexpr(arg_m))));
4677                   } else {
4678                      assign(res, unop(Iop_Abs32Fx2,
4679                                       binop(Iop_Sub32Fx2,
4680                                             mkexpr(arg_n),
4681                                             mkexpr(arg_m))));
4682                   }
4683                   DIP("vabd.f32 %c%u, %c%u, %c%u\n",
4684                       Q ? 'q' : 'd', dreg,
4685                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4686                   break;
4687                }
4688             }
4689             assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4690          } else {
4691             if (U == 0) {
4692                /* VMLA, VMLS  */
4693                IROp op, op2;
4694                UInt P = (theInstr >> 21) & 1;
4695                if (P) {
4696                   switch (size & 1) {
4697                      case 0:
4698                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4699                         op2 = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
4700                         break;
4701                      case 1: return False;
4702                      default: vassert(0);
4703                   }
4704                } else {
4705                   switch (size & 1) {
4706                      case 0:
4707                         op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
4708                         op2 = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
4709                         break;
4710                      case 1: return False;
4711                      default: vassert(0);
4712                   }
4713                }
4714                assign(res, binop_w_fake_RM(
4715                               op2,
4716                               Q ? getQReg(dreg) : getDRegI64(dreg),
4717                               binop_w_fake_RM(op, mkexpr(arg_n),
4718                                                   mkexpr(arg_m))));
4719
4720                DIP("vml%c.f32 %c%u, %c%u, %c%u\n",
4721                    P ? 's' : 'a', Q ? 'q' : 'd',
4722                    dreg, Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4723             } else {
4724                /* VMUL  */
4725                IROp op;
4726                if ((C >> 1) != 0)
4727                   return False;
4728                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2 ;
4729                assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
4730                DIP("vmul.f32 %c%u, %c%u, %c%u\n",
4731                    Q ? 'q' : 'd', dreg,
4732                    Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4733             }
4734          }
4735          break;
4736       case 14:
4737          if (B == 0) {
4738             if (U == 0) {
4739                if ((C >> 1) == 0) {
4740                   /* VCEQ  */
4741                   IROp op;
4742                   if ((theInstr >> 20) & 1)
4743                      return False;
4744                   op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2;
4745                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4746                   DIP("vceq.f32 %c%u, %c%u, %c%u\n",
4747                       Q ? 'q' : 'd', dreg,
4748                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4749                } else {
4750                   return False;
4751                }
4752             } else {
4753                if ((C >> 1) == 0) {
4754                   /* VCGE  */
4755                   IROp op;
4756                   if ((theInstr >> 20) & 1)
4757                      return False;
4758                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4759                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4760                   DIP("vcge.f32 %c%u, %c%u, %c%u\n",
4761                       Q ? 'q' : 'd', dreg,
4762                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4763                } else {
4764                   /* VCGT  */
4765                   IROp op;
4766                   if ((theInstr >> 20) & 1)
4767                      return False;
4768                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4769                   assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4770                   DIP("vcgt.f32 %c%u, %c%u, %c%u\n",
4771                       Q ? 'q' : 'd', dreg,
4772                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4773                }
4774             }
4775          } else {
4776             if (U == 1) {
4777                /* VACGE, VACGT */
4778                UInt op_bit = (theInstr >> 21) & 1;
4779                IROp op, op2;
4780                op2 = Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2;
4781                if (op_bit) {
4782                   op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2;
4783                   assign(res, binop(op,
4784                                     unop(op2, mkexpr(arg_n)),
4785                                     unop(op2, mkexpr(arg_m))));
4786                } else {
4787                   op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2;
4788                   assign(res, binop(op,
4789                                     unop(op2, mkexpr(arg_n)),
4790                                     unop(op2, mkexpr(arg_m))));
4791                }
4792                DIP("vacg%c.f32 %c%u, %c%u, %c%u\n", op_bit ? 't' : 'e',
4793                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg,
4794                    Q ? 'q' : 'd', mreg);
4795             } else {
4796                return False;
4797             }
4798          }
4799          break;
4800       case 15:
4801          if (B == 0) {
4802             if (U == 0) {
4803                /* VMAX, VMIN  */
4804                IROp op;
4805                if ((theInstr >> 20) & 1)
4806                   return False;
4807                if ((theInstr >> 21) & 1) {
4808                   op = Q ? Iop_Min32Fx4 : Iop_Min32Fx2;
4809                   DIP("vmin.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4810                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4811                } else {
4812                   op = Q ? Iop_Max32Fx4 : Iop_Max32Fx2;
4813                   DIP("vmax.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4814                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4815                }
4816                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4817             } else {
4818                /* VPMAX, VPMIN   */
4819                IROp op;
4820                if (Q)
4821                   return False;
4822                if ((theInstr >> 20) & 1)
4823                   return False;
4824                if ((theInstr >> 21) & 1) {
4825                   op = Iop_PwMin32Fx2;
4826                   DIP("vpmin.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4827                } else {
4828                   op = Iop_PwMax32Fx2;
4829                   DIP("vpmax.f32 d%u, d%u, d%u\n", dreg, nreg, mreg);
4830                }
4831                assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
4832             }
4833          } else {
4834             if (U == 0) {
4835                if ((C >> 1) == 0) {
4836                   /* VRECPS */
4837                   if ((theInstr >> 20) & 1)
4838                      return False;
4839                   assign(res, binop(Q ? Iop_RecipStep32Fx4
4840                                       : Iop_RecipStep32Fx2,
4841                                     mkexpr(arg_n),
4842                                     mkexpr(arg_m)));
4843                   DIP("vrecps.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4844                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4845                } else {
4846                   /* VRSQRTS  */
4847                   if ((theInstr >> 20) & 1)
4848                      return False;
4849                   assign(res, binop(Q ? Iop_RSqrtStep32Fx4
4850                                       : Iop_RSqrtStep32Fx2,
4851                                     mkexpr(arg_n),
4852                                     mkexpr(arg_m)));
4853                   DIP("vrsqrts.f32 %c%u, %c%u, %c%u\n", Q ? 'q' : 'd', dreg,
4854                       Q ? 'q' : 'd', nreg, Q ? 'q' : 'd', mreg);
4855                }
4856             } else {
4857                return False;
4858             }
4859          }
4860          break;
4861       default:
4862          /*NOTREACHED*/
4863          vassert(0);
4864    }
4865
4866    if (Q) {
4867       putQReg(dreg, mkexpr(res), condT);
4868    } else {
4869       putDRegI64(dreg, mkexpr(res), condT);
4870    }
4871
4872    return True;
4873 }
4874
4875 /* A7.4.2 Three registers of different length */
4876 static
4877 Bool dis_neon_data_3diff ( UInt theInstr, IRTemp condT )
4878 {
4879    /* In paths where this returns False, indicating a non-decodable
4880       instruction, there may still be some IR assignments to temporaries
4881       generated.  This is inconvenient but harmless, and the post-front-end
4882       IR optimisation pass will just remove them anyway.  So there's no
4883       effort made here to tidy it up.
4884    */
4885    UInt A = (theInstr >> 8) & 0xf;
4886    UInt B = (theInstr >> 20) & 3;
4887    UInt U = (theInstr >> 24) & 1;
4888    UInt P = (theInstr >> 9) & 1;
4889    UInt mreg = get_neon_m_regno(theInstr);
4890    UInt nreg = get_neon_n_regno(theInstr);
4891    UInt dreg = get_neon_d_regno(theInstr);
4892    UInt size = B;
4893    ULong imm;
4894    IRTemp res, arg_m, arg_n, cond, tmp;
4895    IROp cvt, cvt2, cmp, op, op2, sh, add;
4896    switch (A) {
4897       case 0: case 1: case 2: case 3:
4898          /* VADDL, VADDW, VSUBL, VSUBW */
4899          if (dreg & 1)
4900             return False;
4901          dreg >>= 1;
4902          size = B;
4903          switch (size) {
4904             case 0:
4905                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
4906                op = (A & 2) ? Iop_Sub16x8 : Iop_Add16x8;
4907                break;
4908             case 1:
4909                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
4910                op = (A & 2) ? Iop_Sub32x4 : Iop_Add32x4;
4911                break;
4912             case 2:
4913                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
4914                op = (A & 2) ? Iop_Sub64x2 : Iop_Add64x2;
4915                break;
4916             case 3:
4917                return False;
4918             default:
4919                vassert(0);
4920          }
4921          arg_n = newTemp(Ity_V128);
4922          arg_m = newTemp(Ity_V128);
4923          if (A & 1) {
4924             if (nreg & 1)
4925                return False;
4926             nreg >>= 1;
4927             assign(arg_n, getQReg(nreg));
4928          } else {
4929             assign(arg_n, unop(cvt, getDRegI64(nreg)));
4930          }
4931          assign(arg_m, unop(cvt, getDRegI64(mreg)));
4932          putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
4933                        condT);
4934          DIP("v%s%c.%c%d q%u, %c%u, d%u\n", (A & 2) ? "sub" : "add",
4935              (A & 1) ? 'w' : 'l', U ? 'u' : 's', 8 << size, dreg,
4936              (A & 1) ? 'q' : 'd', nreg, mreg);
4937          return True;
4938       case 4:
4939          /* VADDHN, VRADDHN */
4940          if (mreg & 1)
4941             return False;
4942          mreg >>= 1;
4943          if (nreg & 1)
4944             return False;
4945          nreg >>= 1;
4946          size = B;
4947          switch (size) {
4948             case 0:
4949                op = Iop_Add16x8;
4950                cvt = Iop_NarrowUn16to8x8;
4951                sh = Iop_ShrN16x8;
4952                imm = 1U << 7;
4953                imm = (imm << 16) | imm;
4954                imm = (imm << 32) | imm;
4955                break;
4956             case 1:
4957                op = Iop_Add32x4;
4958                cvt = Iop_NarrowUn32to16x4;
4959                sh = Iop_ShrN32x4;
4960                imm = 1U << 15;
4961                imm = (imm << 32) | imm;
4962                break;
4963             case 2:
4964                op = Iop_Add64x2;
4965                cvt = Iop_NarrowUn64to32x2;
4966                sh = Iop_ShrN64x2;
4967                imm = 1U << 31;
4968                break;
4969             case 3:
4970                return False;
4971             default:
4972                vassert(0);
4973          }
4974          tmp = newTemp(Ity_V128);
4975          res = newTemp(Ity_V128);
4976          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
4977          if (U) {
4978             /* VRADDHN */
4979             assign(res, binop(op, mkexpr(tmp),
4980                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
4981          } else {
4982             assign(res, mkexpr(tmp));
4983          }
4984          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
4985                     condT);
4986          DIP("v%saddhn.i%d d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
4987              nreg, mreg);
4988          return True;
4989       case 5:
4990          /* VABAL */
4991          if (!((theInstr >> 23) & 1)) {
4992             vpanic("VABA should not be in dis_neon_data_3diff\n");
4993          }
4994          if (dreg & 1)
4995             return False;
4996          dreg >>= 1;
4997          switch (size) {
4998             case 0:
4999                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
5000                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
5001                cvt2 = Iop_Widen8Sto16x8;
5002                op = Iop_Sub16x8;
5003                op2 = Iop_Add16x8;
5004                break;
5005             case 1:
5006                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
5007                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
5008                cvt2 = Iop_Widen16Sto32x4;
5009                op = Iop_Sub32x4;
5010                op2 = Iop_Add32x4;
5011                break;
5012             case 2:
5013                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
5014                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
5015                cvt2 = Iop_Widen32Sto64x2;
5016                op = Iop_Sub64x2;
5017                op2 = Iop_Add64x2;
5018                break;
5019             case 3:
5020                return False;
5021             default:
5022                vassert(0);
5023          }
5024          arg_n = newTemp(Ity_V128);
5025          arg_m = newTemp(Ity_V128);
5026          cond = newTemp(Ity_V128);
5027          res = newTemp(Ity_V128);
5028          assign(arg_n, unop(cvt, getDRegI64(nreg)));
5029          assign(arg_m, unop(cvt, getDRegI64(mreg)));
5030          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
5031                                             getDRegI64(mreg))));
5032          assign(res, binop(op2,
5033                            binop(Iop_OrV128,
5034                                  binop(Iop_AndV128,
5035                                        binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5036                                        mkexpr(cond)),
5037                                  binop(Iop_AndV128,
5038                                        binop(op, mkexpr(arg_m), mkexpr(arg_n)),
5039                                        unop(Iop_NotV128, mkexpr(cond)))),
5040                            getQReg(dreg)));
5041          putQReg(dreg, mkexpr(res), condT);
5042          DIP("vabal.%c%d q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
5043              nreg, mreg);
5044          return True;
5045       case 6:
5046          /* VSUBHN, VRSUBHN */
5047          if (mreg & 1)
5048             return False;
5049          mreg >>= 1;
5050          if (nreg & 1)
5051             return False;
5052          nreg >>= 1;
5053          size = B;
5054          switch (size) {
5055             case 0:
5056                op = Iop_Sub16x8;
5057                op2 = Iop_Add16x8;
5058                cvt = Iop_NarrowUn16to8x8;
5059                sh = Iop_ShrN16x8;
5060                imm = 1U << 7;
5061                imm = (imm << 16) | imm;
5062                imm = (imm << 32) | imm;
5063                break;
5064             case 1:
5065                op = Iop_Sub32x4;
5066                op2 = Iop_Add32x4;
5067                cvt = Iop_NarrowUn32to16x4;
5068                sh = Iop_ShrN32x4;
5069                imm = 1U << 15;
5070                imm = (imm << 32) | imm;
5071                break;
5072             case 2:
5073                op = Iop_Sub64x2;
5074                op2 = Iop_Add64x2;
5075                cvt = Iop_NarrowUn64to32x2;
5076                sh = Iop_ShrN64x2;
5077                imm = 1U << 31;
5078                break;
5079             case 3:
5080                return False;
5081             default:
5082                vassert(0);
5083          }
5084          tmp = newTemp(Ity_V128);
5085          res = newTemp(Ity_V128);
5086          assign(tmp, binop(op, getQReg(nreg), getQReg(mreg)));
5087          if (U) {
5088             /* VRSUBHN */
5089             assign(res, binop(op2, mkexpr(tmp),
5090                      binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm))));
5091          } else {
5092             assign(res, mkexpr(tmp));
5093          }
5094          putDRegI64(dreg, unop(cvt, binop(sh, mkexpr(res), mkU8(8 << size))),
5095                     condT);
5096          DIP("v%ssubhn.i%d d%u, q%u, q%u\n", U ? "r" : "", 16 << size, dreg,
5097              nreg, mreg);
5098          return True;
5099       case 7:
5100          /* VABDL */
5101          if (!((theInstr >> 23) & 1)) {
5102             vpanic("VABL should not be in dis_neon_data_3diff\n");
5103          }
5104          if (dreg & 1)
5105             return False;
5106          dreg >>= 1;
5107          switch (size) {
5108             case 0:
5109                cmp = U ? Iop_CmpGT8Ux8 : Iop_CmpGT8Sx8;
5110                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
5111                cvt2 = Iop_Widen8Sto16x8;
5112                op = Iop_Sub16x8;
5113                break;
5114             case 1:
5115                cmp = U ? Iop_CmpGT16Ux4 : Iop_CmpGT16Sx4;
5116                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
5117                cvt2 = Iop_Widen16Sto32x4;
5118                op = Iop_Sub32x4;
5119                break;
5120             case 2:
5121                cmp = U ? Iop_CmpGT32Ux2 : Iop_CmpGT32Sx2;
5122                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
5123                cvt2 = Iop_Widen32Sto64x2;
5124                op = Iop_Sub64x2;
5125                break;
5126             case 3:
5127                return False;
5128             default:
5129                vassert(0);
5130          }
5131          arg_n = newTemp(Ity_V128);
5132          arg_m = newTemp(Ity_V128);
5133          cond = newTemp(Ity_V128);
5134          res = newTemp(Ity_V128);
5135          assign(arg_n, unop(cvt, getDRegI64(nreg)));
5136          assign(arg_m, unop(cvt, getDRegI64(mreg)));
5137          assign(cond, unop(cvt2, binop(cmp, getDRegI64(nreg),
5138                                             getDRegI64(mreg))));
5139          assign(res, binop(Iop_OrV128,
5140                            binop(Iop_AndV128,
5141                                  binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5142                                  mkexpr(cond)),
5143                            binop(Iop_AndV128,
5144                                  binop(op, mkexpr(arg_m), mkexpr(arg_n)),
5145                                  unop(Iop_NotV128, mkexpr(cond)))));
5146          putQReg(dreg, mkexpr(res), condT);
5147          DIP("vabdl.%c%d q%u, d%u, d%u\n", U ? 'u' : 's', 8 << size, dreg,
5148              nreg, mreg);
5149          return True;
5150       case 8:
5151       case 10:
5152          /* VMLAL, VMLSL (integer) */
5153          if (dreg & 1)
5154             return False;
5155          dreg >>= 1;
5156          size = B;
5157          switch (size) {
5158             case 0:
5159                op = U ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5160                op2 = P ? Iop_Sub16x8 : Iop_Add16x8;
5161                break;
5162             case 1:
5163                op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5164                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5165                break;
5166             case 2:
5167                op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5168                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5169                break;
5170             case 3:
5171                return False;
5172             default:
5173                vassert(0);
5174          }
5175          res = newTemp(Ity_V128);
5176          assign(res, binop(op, getDRegI64(nreg),getDRegI64(mreg)));
5177          putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5178          DIP("vml%cl.%c%d q%u, d%u, d%u\n", P ? 's' : 'a', U ? 'u' : 's',
5179              8 << size, dreg, nreg, mreg);
5180          return True;
5181       case 9:
5182       case 11:
5183          /* VQDMLAL, VQDMLSL */
5184          if (U)
5185             return False;
5186          if (dreg & 1)
5187             return False;
5188          dreg >>= 1;
5189          size = B;
5190          switch (size) {
5191             case 0: case 3:
5192                return False;
5193             case 1:
5194                op = Iop_QDMull16Sx4;
5195                cmp = Iop_CmpEQ16x4;
5196                add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5197                op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5198                imm = 1LL << 15;
5199                imm = (imm << 16) | imm;
5200                imm = (imm << 32) | imm;
5201                break;
5202             case 2:
5203                op = Iop_QDMull32Sx2;
5204                cmp = Iop_CmpEQ32x2;
5205                add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5206                op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5207                imm = 1LL << 31;
5208                imm = (imm << 32) | imm;
5209                break;
5210             default:
5211                vassert(0);
5212          }
5213          res = newTemp(Ity_V128);
5214          tmp = newTemp(Ity_V128);
5215          assign(res, binop(op, getDRegI64(nreg), getDRegI64(mreg)));
5216          assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5217          setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5218                     True, condT);
5219          setFlag_QC(binop(Iop_And64,
5220                           binop(cmp, getDRegI64(nreg), mkU64(imm)),
5221                           binop(cmp, getDRegI64(mreg), mkU64(imm))),
5222                     mkU64(0),
5223                     False, condT);
5224          putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5225          DIP("vqdml%cl.s%d q%u, d%u, d%u\n", P ? 's' : 'a', 8 << size, dreg,
5226              nreg, mreg);
5227          return True;
5228       case 12:
5229       case 14:
5230          /* VMULL (integer or polynomial) */
5231          if (dreg & 1)
5232             return False;
5233          dreg >>= 1;
5234          size = B;
5235          switch (size) {
5236             case 0:
5237                op = (U) ? Iop_Mull8Ux8 : Iop_Mull8Sx8;
5238                if (P)
5239                   op = Iop_PolynomialMull8x8;
5240                break;
5241             case 1:
5242                if (P) return False;
5243                op = (U) ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5244                break;
5245             case 2:
5246                if (P) return False;
5247                op = (U) ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5248                break;
5249             case 3:
5250                return False;
5251             default:
5252                vassert(0);
5253          }
5254          putQReg(dreg, binop(op, getDRegI64(nreg),
5255                                  getDRegI64(mreg)), condT);
5256          DIP("vmull.%c%d q%u, d%u, d%u\n", P ? 'p' : (U ? 'u' : 's'),
5257                8 << size, dreg, nreg, mreg);
5258          return True;
5259       case 13:
5260          /* VQDMULL */
5261          if (U)
5262             return False;
5263          if (dreg & 1)
5264             return False;
5265          dreg >>= 1;
5266          size = B;
5267          switch (size) {
5268             case 0:
5269             case 3:
5270                return False;
5271             case 1:
5272                op = Iop_QDMull16Sx4;
5273                op2 = Iop_CmpEQ16x4;
5274                imm = 1LL << 15;
5275                imm = (imm << 16) | imm;
5276                imm = (imm << 32) | imm;
5277                break;
5278             case 2:
5279                op = Iop_QDMull32Sx2;
5280                op2 = Iop_CmpEQ32x2;
5281                imm = 1LL << 31;
5282                imm = (imm << 32) | imm;
5283                break;
5284             default:
5285                vassert(0);
5286          }
5287          putQReg(dreg, binop(op, getDRegI64(nreg), getDRegI64(mreg)),
5288                condT);
5289          setFlag_QC(binop(Iop_And64,
5290                           binop(op2, getDRegI64(nreg), mkU64(imm)),
5291                           binop(op2, getDRegI64(mreg), mkU64(imm))),
5292                     mkU64(0),
5293                     False, condT);
5294          DIP("vqdmull.s%d q%u, d%u, d%u\n", 8 << size, dreg, nreg, mreg);
5295          return True;
5296       default:
5297          return False;
5298    }
5299    return False;
5300 }
5301
5302 /* A7.4.3 Two registers and a scalar */
5303 static
5304 Bool dis_neon_data_2reg_and_scalar ( UInt theInstr, IRTemp condT )
5305 {
5306 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
5307    UInt U = INSN(24,24);
5308    UInt dreg = get_neon_d_regno(theInstr & ~(1 << 6));
5309    UInt nreg = get_neon_n_regno(theInstr & ~(1 << 6));
5310    UInt mreg = get_neon_m_regno(theInstr & ~(1 << 6));
5311    UInt size = INSN(21,20);
5312    UInt index;
5313    UInt Q = INSN(24,24);
5314
5315    if (INSN(27,25) != 1 || INSN(23,23) != 1
5316        || INSN(6,6) != 1 || INSN(4,4) != 0)
5317       return False;
5318
5319    /* VMLA, VMLS (scalar)  */
5320    if ((INSN(11,8) & BITS4(1,0,1,0)) == BITS4(0,0,0,0)) {
5321       IRTemp res, arg_m, arg_n;
5322       IROp dup, get, op, op2, add, sub;
5323       if (Q) {
5324          if ((dreg & 1) || (nreg & 1))
5325             return False;
5326          dreg >>= 1;
5327          nreg >>= 1;
5328          res = newTemp(Ity_V128);
5329          arg_m = newTemp(Ity_V128);
5330          arg_n = newTemp(Ity_V128);
5331          assign(arg_n, getQReg(nreg));
5332          switch(size) {
5333             case 1:
5334                dup = Iop_Dup16x8;
5335                get = Iop_GetElem16x4;
5336                index = mreg >> 3;
5337                mreg &= 7;
5338                break;
5339             case 2:
5340                dup = Iop_Dup32x4;
5341                get = Iop_GetElem32x2;
5342                index = mreg >> 4;
5343                mreg &= 0xf;
5344                break;
5345             case 0:
5346             case 3:
5347                return False;
5348             default:
5349                vassert(0);
5350          }
5351          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5352       } else {
5353          res = newTemp(Ity_I64);
5354          arg_m = newTemp(Ity_I64);
5355          arg_n = newTemp(Ity_I64);
5356          assign(arg_n, getDRegI64(nreg));
5357          switch(size) {
5358             case 1:
5359                dup = Iop_Dup16x4;
5360                get = Iop_GetElem16x4;
5361                index = mreg >> 3;
5362                mreg &= 7;
5363                break;
5364             case 2:
5365                dup = Iop_Dup32x2;
5366                get = Iop_GetElem32x2;
5367                index = mreg >> 4;
5368                mreg &= 0xf;
5369                break;
5370             case 0:
5371             case 3:
5372                return False;
5373             default:
5374                vassert(0);
5375          }
5376          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5377       }
5378       if (INSN(8,8)) {
5379          switch (size) {
5380             case 2:
5381                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5382                add = Q ? Iop_Add32Fx4 : Iop_Add32Fx2;
5383                sub = Q ? Iop_Sub32Fx4 : Iop_Sub32Fx2;
5384                break;
5385             case 0:
5386             case 1:
5387             case 3:
5388                return False;
5389             default:
5390                vassert(0);
5391          }
5392       } else {
5393          switch (size) {
5394             case 1:
5395                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5396                add = Q ? Iop_Add16x8 : Iop_Add16x4;
5397                sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
5398                break;
5399             case 2:
5400                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5401                add = Q ? Iop_Add32x4 : Iop_Add32x2;
5402                sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
5403                break;
5404             case 0:
5405             case 3:
5406                return False;
5407             default:
5408                vassert(0);
5409          }
5410       }
5411       op2 = INSN(10,10) ? sub : add;
5412       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5413       if (Q)
5414          putQReg(dreg, binop_w_fake_RM(op2, getQReg(dreg), mkexpr(res)),
5415                  condT);
5416       else
5417          putDRegI64(dreg, binop(op2, getDRegI64(dreg), mkexpr(res)),
5418                     condT);
5419       DIP("vml%c.%c%d %c%u, %c%u, d%u[%u]\n", INSN(10,10) ? 's' : 'a',
5420             INSN(8,8) ? 'f' : 'i', 8 << size,
5421             Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', nreg, mreg, index);
5422       return True;
5423    }
5424
5425    /* VMLAL, VMLSL (scalar)   */
5426    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,0)) {
5427       IRTemp res, arg_m, arg_n;
5428       IROp dup, get, op, op2, add, sub;
5429       if (dreg & 1)
5430          return False;
5431       dreg >>= 1;
5432       res = newTemp(Ity_V128);
5433       arg_m = newTemp(Ity_I64);
5434       arg_n = newTemp(Ity_I64);
5435       assign(arg_n, getDRegI64(nreg));
5436       switch(size) {
5437          case 1:
5438             dup = Iop_Dup16x4;
5439             get = Iop_GetElem16x4;
5440             index = mreg >> 3;
5441             mreg &= 7;
5442             break;
5443          case 2:
5444             dup = Iop_Dup32x2;
5445             get = Iop_GetElem32x2;
5446             index = mreg >> 4;
5447             mreg &= 0xf;
5448             break;
5449          case 0:
5450          case 3:
5451             return False;
5452          default:
5453             vassert(0);
5454       }
5455       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5456       switch (size) {
5457          case 1:
5458             op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4;
5459             add = Iop_Add32x4;
5460             sub = Iop_Sub32x4;
5461             break;
5462          case 2:
5463             op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2;
5464             add = Iop_Add64x2;
5465             sub = Iop_Sub64x2;
5466             break;
5467          case 0:
5468          case 3:
5469             return False;
5470          default:
5471             vassert(0);
5472       }
5473       op2 = INSN(10,10) ? sub : add;
5474       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5475       putQReg(dreg, binop(op2, getQReg(dreg), mkexpr(res)), condT);
5476       DIP("vml%cl.%c%d q%u, d%u, d%u[%u]\n",
5477           INSN(10,10) ? 's' : 'a', U ? 'u' : 's',
5478           8 << size, dreg, nreg, mreg, index);
5479       return True;
5480    }
5481
5482    /* VQDMLAL, VQDMLSL (scalar)  */
5483    if ((INSN(11,8) & BITS4(1,0,1,1)) == BITS4(0,0,1,1) && !U) {
5484       IRTemp res, arg_m, arg_n, tmp;
5485       IROp dup, get, op, op2, add, cmp;
5486       UInt P = INSN(10,10);
5487       ULong imm;
5488       if (dreg & 1)
5489          return False;
5490       dreg >>= 1;
5491       res = newTemp(Ity_V128);
5492       arg_m = newTemp(Ity_I64);
5493       arg_n = newTemp(Ity_I64);
5494       assign(arg_n, getDRegI64(nreg));
5495       switch(size) {
5496          case 1:
5497             dup = Iop_Dup16x4;
5498             get = Iop_GetElem16x4;
5499             index = mreg >> 3;
5500             mreg &= 7;
5501             break;
5502          case 2:
5503             dup = Iop_Dup32x2;
5504             get = Iop_GetElem32x2;
5505             index = mreg >> 4;
5506             mreg &= 0xf;
5507             break;
5508          case 0:
5509          case 3:
5510             return False;
5511          default:
5512             vassert(0);
5513       }
5514       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5515       switch (size) {
5516          case 0:
5517          case 3:
5518             return False;
5519          case 1:
5520             op = Iop_QDMull16Sx4;
5521             cmp = Iop_CmpEQ16x4;
5522             add = P ? Iop_QSub32Sx4 : Iop_QAdd32Sx4;
5523             op2 = P ? Iop_Sub32x4 : Iop_Add32x4;
5524             imm = 1LL << 15;
5525             imm = (imm << 16) | imm;
5526             imm = (imm << 32) | imm;
5527             break;
5528          case 2:
5529             op = Iop_QDMull32Sx2;
5530             cmp = Iop_CmpEQ32x2;
5531             add = P ? Iop_QSub64Sx2 : Iop_QAdd64Sx2;
5532             op2 = P ? Iop_Sub64x2 : Iop_Add64x2;
5533             imm = 1LL << 31;
5534             imm = (imm << 32) | imm;
5535             break;
5536          default:
5537             vassert(0);
5538       }
5539       res = newTemp(Ity_V128);
5540       tmp = newTemp(Ity_V128);
5541       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5542       assign(tmp, binop(op2, getQReg(dreg), mkexpr(res)));
5543       setFlag_QC(binop(Iop_And64,
5544                        binop(cmp, mkexpr(arg_n), mkU64(imm)),
5545                        binop(cmp, mkexpr(arg_m), mkU64(imm))),
5546                  mkU64(0),
5547                  False, condT);
5548       setFlag_QC(mkexpr(tmp), binop(add, getQReg(dreg), mkexpr(res)),
5549                  True, condT);
5550       putQReg(dreg, binop(add, getQReg(dreg), mkexpr(res)), condT);
5551       DIP("vqdml%cl.s%d q%u, d%u, d%u[%u]\n", P ? 's' : 'a', 8 << size,
5552           dreg, nreg, mreg, index);
5553       return True;
5554    }
5555
5556    /* VMUL (by scalar)  */
5557    if ((INSN(11,8) & BITS4(1,1,1,0)) == BITS4(1,0,0,0)) {
5558       IRTemp res, arg_m, arg_n;
5559       IROp dup, get, op;
5560       if (Q) {
5561          if ((dreg & 1) || (nreg & 1))
5562             return False;
5563          dreg >>= 1;
5564          nreg >>= 1;
5565          res = newTemp(Ity_V128);
5566          arg_m = newTemp(Ity_V128);
5567          arg_n = newTemp(Ity_V128);
5568          assign(arg_n, getQReg(nreg));
5569          switch(size) {
5570             case 1:
5571                dup = Iop_Dup16x8;
5572                get = Iop_GetElem16x4;
5573                index = mreg >> 3;
5574                mreg &= 7;
5575                break;
5576             case 2:
5577                dup = Iop_Dup32x4;
5578                get = Iop_GetElem32x2;
5579                index = mreg >> 4;
5580                mreg &= 0xf;
5581                break;
5582             case 0:
5583             case 3:
5584                return False;
5585             default:
5586                vassert(0);
5587          }
5588          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5589       } else {
5590          res = newTemp(Ity_I64);
5591          arg_m = newTemp(Ity_I64);
5592          arg_n = newTemp(Ity_I64);
5593          assign(arg_n, getDRegI64(nreg));
5594          switch(size) {
5595             case 1:
5596                dup = Iop_Dup16x4;
5597                get = Iop_GetElem16x4;
5598                index = mreg >> 3;
5599                mreg &= 7;
5600                break;
5601             case 2:
5602                dup = Iop_Dup32x2;
5603                get = Iop_GetElem32x2;
5604                index = mreg >> 4;
5605                mreg &= 0xf;
5606                break;
5607             case 0:
5608             case 3:
5609                return False;
5610             default:
5611                vassert(0);
5612          }
5613          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5614       }
5615       if (INSN(8,8)) {
5616          switch (size) {
5617             case 2:
5618                op = Q ? Iop_Mul32Fx4 : Iop_Mul32Fx2;
5619                break;
5620             case 0:
5621             case 1:
5622             case 3:
5623                return False;
5624             default:
5625                vassert(0);
5626          }
5627       } else {
5628          switch (size) {
5629             case 1:
5630                op = Q ? Iop_Mul16x8 : Iop_Mul16x4;
5631                break;
5632             case 2:
5633                op = Q ? Iop_Mul32x4 : Iop_Mul32x2;
5634                break;
5635             case 0:
5636             case 3:
5637                return False;
5638             default:
5639                vassert(0);
5640          }
5641       }
5642       assign(res, binop_w_fake_RM(op, mkexpr(arg_n), mkexpr(arg_m)));
5643       if (Q)
5644          putQReg(dreg, mkexpr(res), condT);
5645       else
5646          putDRegI64(dreg, mkexpr(res), condT);
5647       DIP("vmul.%c%d %c%u, %c%u, d%u[%u]\n", INSN(8,8) ? 'f' : 'i',
5648           8 << size, Q ? 'q' : 'd', dreg,
5649           Q ? 'q' : 'd', nreg, mreg, index);
5650       return True;
5651    }
5652
5653    /* VMULL (scalar) */
5654    if (INSN(11,8) == BITS4(1,0,1,0)) {
5655       IRTemp res, arg_m, arg_n;
5656       IROp dup, get, op;
5657       if (dreg & 1)
5658          return False;
5659       dreg >>= 1;
5660       res = newTemp(Ity_V128);
5661       arg_m = newTemp(Ity_I64);
5662       arg_n = newTemp(Ity_I64);
5663       assign(arg_n, getDRegI64(nreg));
5664       switch(size) {
5665          case 1:
5666             dup = Iop_Dup16x4;
5667             get = Iop_GetElem16x4;
5668             index = mreg >> 3;
5669             mreg &= 7;
5670             break;
5671          case 2:
5672             dup = Iop_Dup32x2;
5673             get = Iop_GetElem32x2;
5674             index = mreg >> 4;
5675             mreg &= 0xf;
5676             break;
5677          case 0:
5678          case 3:
5679             return False;
5680          default:
5681             vassert(0);
5682       }
5683       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5684       switch (size) {
5685          case 1: op = U ? Iop_Mull16Ux4 : Iop_Mull16Sx4; break;
5686          case 2: op = U ? Iop_Mull32Ux2 : Iop_Mull32Sx2; break;
5687          case 0: case 3: return False;
5688          default: vassert(0);
5689       }
5690       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5691       putQReg(dreg, mkexpr(res), condT);
5692       DIP("vmull.%c%d q%u, d%u, d%u[%u]\n", U ? 'u' : 's', 8 << size, dreg,
5693           nreg, mreg, index);
5694       return True;
5695    }
5696
5697    /* VQDMULL */
5698    if (INSN(11,8) == BITS4(1,0,1,1) && !U) {
5699       IROp op ,op2, dup, get;
5700       ULong imm;
5701       IRTemp arg_m, arg_n;
5702       if (dreg & 1)
5703          return False;
5704       dreg >>= 1;
5705       arg_m = newTemp(Ity_I64);
5706       arg_n = newTemp(Ity_I64);
5707       assign(arg_n, getDRegI64(nreg));
5708       switch(size) {
5709          case 1:
5710             dup = Iop_Dup16x4;
5711             get = Iop_GetElem16x4;
5712             index = mreg >> 3;
5713             mreg &= 7;
5714             break;
5715          case 2:
5716             dup = Iop_Dup32x2;
5717             get = Iop_GetElem32x2;
5718             index = mreg >> 4;
5719             mreg &= 0xf;
5720             break;
5721          case 0:
5722          case 3:
5723             return False;
5724          default:
5725             vassert(0);
5726       }
5727       assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5728       switch (size) {
5729          case 0:
5730          case 3:
5731             return False;
5732          case 1:
5733             op = Iop_QDMull16Sx4;
5734             op2 = Iop_CmpEQ16x4;
5735             imm = 1LL << 15;
5736             imm = (imm << 16) | imm;
5737             imm = (imm << 32) | imm;
5738             break;
5739          case 2:
5740             op = Iop_QDMull32Sx2;
5741             op2 = Iop_CmpEQ32x2;
5742             imm = 1LL << 31;
5743             imm = (imm << 32) | imm;
5744             break;
5745          default:
5746             vassert(0);
5747       }
5748       putQReg(dreg, binop(op, mkexpr(arg_n), mkexpr(arg_m)),
5749             condT);
5750       setFlag_QC(binop(Iop_And64,
5751                        binop(op2, mkexpr(arg_n), mkU64(imm)),
5752                        binop(op2, mkexpr(arg_m), mkU64(imm))),
5753                  mkU64(0),
5754                  False, condT);
5755       DIP("vqdmull.s%d q%u, d%u, d%u[%u]\n", 8 << size, dreg, nreg, mreg,
5756           index);
5757       return True;
5758    }
5759
5760    /* VQDMULH */
5761    if (INSN(11,8) == BITS4(1,1,0,0)) {
5762       IROp op ,op2, dup, get;
5763       ULong imm;
5764       IRTemp res, arg_m, arg_n;
5765       if (Q) {
5766          if ((dreg & 1) || (nreg & 1))
5767             return False;
5768          dreg >>= 1;
5769          nreg >>= 1;
5770          res = newTemp(Ity_V128);
5771          arg_m = newTemp(Ity_V128);
5772          arg_n = newTemp(Ity_V128);
5773          assign(arg_n, getQReg(nreg));
5774          switch(size) {
5775             case 1:
5776                dup = Iop_Dup16x8;
5777                get = Iop_GetElem16x4;
5778                index = mreg >> 3;
5779                mreg &= 7;
5780                break;
5781             case 2:
5782                dup = Iop_Dup32x4;
5783                get = Iop_GetElem32x2;
5784                index = mreg >> 4;
5785                mreg &= 0xf;
5786                break;
5787             case 0:
5788             case 3:
5789                return False;
5790             default:
5791                vassert(0);
5792          }
5793          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5794       } else {
5795          res = newTemp(Ity_I64);
5796          arg_m = newTemp(Ity_I64);
5797          arg_n = newTemp(Ity_I64);
5798          assign(arg_n, getDRegI64(nreg));
5799          switch(size) {
5800             case 1:
5801                dup = Iop_Dup16x4;
5802                get = Iop_GetElem16x4;
5803                index = mreg >> 3;
5804                mreg &= 7;
5805                break;
5806             case 2:
5807                dup = Iop_Dup32x2;
5808                get = Iop_GetElem32x2;
5809                index = mreg >> 4;
5810                mreg &= 0xf;
5811                break;
5812             case 0:
5813             case 3:
5814                return False;
5815             default:
5816                vassert(0);
5817          }
5818          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5819       }
5820       switch (size) {
5821          case 0:
5822          case 3:
5823             return False;
5824          case 1:
5825             op = Q ? Iop_QDMulHi16Sx8 : Iop_QDMulHi16Sx4;
5826             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5827             imm = 1LL << 15;
5828             imm = (imm << 16) | imm;
5829             imm = (imm << 32) | imm;
5830             break;
5831          case 2:
5832             op = Q ? Iop_QDMulHi32Sx4 : Iop_QDMulHi32Sx2;
5833             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5834             imm = 1LL << 31;
5835             imm = (imm << 32) | imm;
5836             break;
5837          default:
5838             vassert(0);
5839       }
5840       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5841       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5842                        binop(op2, mkexpr(arg_n),
5843                                   Q ? mkU128(imm) : mkU64(imm)),
5844                        binop(op2, mkexpr(arg_m),
5845                              Q ? mkU128(imm) : mkU64(imm))),
5846                  Q ? mkU128(0) : mkU64(0),
5847                  Q, condT);
5848       if (Q)
5849          putQReg(dreg, mkexpr(res), condT);
5850       else
5851          putDRegI64(dreg, mkexpr(res), condT);
5852       DIP("vqdmulh.s%d %c%u, %c%u, d%u[%u]\n",
5853           8 << size, Q ? 'q' : 'd', dreg,
5854           Q ? 'q' : 'd', nreg, mreg, index);
5855       return True;
5856    }
5857
5858    /* VQRDMULH (scalar) */
5859    if (INSN(11,8) == BITS4(1,1,0,1)) {
5860       IROp op ,op2, dup, get;
5861       ULong imm;
5862       IRTemp res, arg_m, arg_n;
5863       if (Q) {
5864          if ((dreg & 1) || (nreg & 1))
5865             return False;
5866          dreg >>= 1;
5867          nreg >>= 1;
5868          res = newTemp(Ity_V128);
5869          arg_m = newTemp(Ity_V128);
5870          arg_n = newTemp(Ity_V128);
5871          assign(arg_n, getQReg(nreg));
5872          switch(size) {
5873             case 1:
5874                dup = Iop_Dup16x8;
5875                get = Iop_GetElem16x4;
5876                index = mreg >> 3;
5877                mreg &= 7;
5878                break;
5879             case 2:
5880                dup = Iop_Dup32x4;
5881                get = Iop_GetElem32x2;
5882                index = mreg >> 4;
5883                mreg &= 0xf;
5884                break;
5885             case 0:
5886             case 3:
5887                return False;
5888             default:
5889                vassert(0);
5890          }
5891          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5892       } else {
5893          res = newTemp(Ity_I64);
5894          arg_m = newTemp(Ity_I64);
5895          arg_n = newTemp(Ity_I64);
5896          assign(arg_n, getDRegI64(nreg));
5897          switch(size) {
5898             case 1:
5899                dup = Iop_Dup16x4;
5900                get = Iop_GetElem16x4;
5901                index = mreg >> 3;
5902                mreg &= 7;
5903                break;
5904             case 2:
5905                dup = Iop_Dup32x2;
5906                get = Iop_GetElem32x2;
5907                index = mreg >> 4;
5908                mreg &= 0xf;
5909                break;
5910             case 0:
5911             case 3:
5912                return False;
5913             default:
5914                vassert(0);
5915          }
5916          assign(arg_m, unop(dup, binop(get, getDRegI64(mreg), mkU8(index))));
5917       }
5918       switch (size) {
5919          case 0:
5920          case 3:
5921             return False;
5922          case 1:
5923             op = Q ? Iop_QRDMulHi16Sx8 : Iop_QRDMulHi16Sx4;
5924             op2 = Q ? Iop_CmpEQ16x8 : Iop_CmpEQ16x4;
5925             imm = 1LL << 15;
5926             imm = (imm << 16) | imm;
5927             imm = (imm << 32) | imm;
5928             break;
5929          case 2:
5930             op = Q ? Iop_QRDMulHi32Sx4 : Iop_QRDMulHi32Sx2;
5931             op2 = Q ? Iop_CmpEQ32x4 : Iop_CmpEQ32x2;
5932             imm = 1LL << 31;
5933             imm = (imm << 32) | imm;
5934             break;
5935          default:
5936             vassert(0);
5937       }
5938       assign(res, binop(op, mkexpr(arg_n), mkexpr(arg_m)));
5939       setFlag_QC(binop(Q ? Iop_AndV128 : Iop_And64,
5940                        binop(op2, mkexpr(arg_n),
5941                                   Q ? mkU128(imm) : mkU64(imm)),
5942                        binop(op2, mkexpr(arg_m),
5943                                   Q ? mkU128(imm) : mkU64(imm))),
5944                  Q ? mkU128(0) : mkU64(0),
5945                  Q, condT);
5946       if (Q)
5947          putQReg(dreg, mkexpr(res), condT);
5948       else
5949          putDRegI64(dreg, mkexpr(res), condT);
5950       DIP("vqrdmulh.s%d %c%u, %c%u, d%u[%u]\n",
5951           8 << size, Q ? 'q' : 'd', dreg,
5952           Q ? 'q' : 'd', nreg, mreg, index);
5953       return True;
5954    }
5955
5956    return False;
5957 #  undef INSN
5958 }
5959
5960 /* A7.4.4 Two registers and a shift amount */
5961 static
5962 Bool dis_neon_data_2reg_and_shift ( UInt theInstr, IRTemp condT )
5963 {
5964    UInt A = (theInstr >> 8) & 0xf;
5965    UInt B = (theInstr >> 6) & 1;
5966    UInt L = (theInstr >> 7) & 1;
5967    UInt U = (theInstr >> 24) & 1;
5968    UInt Q = B;
5969    UInt imm6 = (theInstr >> 16) & 0x3f;
5970    UInt shift_imm;
5971    UInt size = 4;
5972    UInt tmp;
5973    UInt mreg = get_neon_m_regno(theInstr);
5974    UInt dreg = get_neon_d_regno(theInstr);
5975    ULong imm = 0;
5976    IROp op, cvt, add = Iop_INVALID, cvt2, op_rev;
5977    IRTemp reg_m, res, mask;
5978
5979    if (L == 0 && ((theInstr >> 19) & 7) == 0)
5980       /* It is one reg and immediate */
5981       return False;
5982
5983    tmp = (L << 6) | imm6;
5984    if (tmp & 0x40) {
5985       size = 3;
5986       shift_imm = 64 - imm6;
5987    } else if (tmp & 0x20) {
5988       size = 2;
5989       shift_imm = 64 - imm6;
5990    } else if (tmp & 0x10) {
5991       size = 1;
5992       shift_imm = 32 - imm6;
5993    } else if (tmp & 0x8) {
5994       size = 0;
5995       shift_imm = 16 - imm6;
5996    } else {
5997       return False;
5998    }
5999
6000    switch (A) {
6001       case 3:
6002       case 2:
6003          /* VRSHR, VRSRA */
6004          if (shift_imm > 0) {
6005             IRExpr *imm_val;
6006             imm = 1L;
6007             switch (size) {
6008                case 0:
6009                   imm = (imm << 8) | imm;
6010                   /* fall through */
6011                case 1:
6012                   imm = (imm << 16) | imm;
6013                   /* fall through */
6014                case 2:
6015                   imm = (imm << 32) | imm;
6016                   /* fall through */
6017                case 3:
6018                   break;
6019                default:
6020                   vassert(0);
6021             }
6022             if (Q) {
6023                reg_m = newTemp(Ity_V128);
6024                res = newTemp(Ity_V128);
6025                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
6026                assign(reg_m, getQReg(mreg));
6027                switch (size) {
6028                   case 0:
6029                      add = Iop_Add8x16;
6030                      op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
6031                      break;
6032                   case 1:
6033                      add = Iop_Add16x8;
6034                      op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6035                      break;
6036                   case 2:
6037                      add = Iop_Add32x4;
6038                      op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6039                      break;
6040                   case 3:
6041                      add = Iop_Add64x2;
6042                      op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6043                      break;
6044                   default:
6045                      vassert(0);
6046                }
6047             } else {
6048                reg_m = newTemp(Ity_I64);
6049                res = newTemp(Ity_I64);
6050                imm_val = mkU64(imm);
6051                assign(reg_m, getDRegI64(mreg));
6052                switch (size) {
6053                   case 0:
6054                      add = Iop_Add8x8;
6055                      op = U ? Iop_ShrN8x8 : Iop_SarN8x8;
6056                      break;
6057                   case 1:
6058                      add = Iop_Add16x4;
6059                      op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
6060                      break;
6061                   case 2:
6062                      add = Iop_Add32x2;
6063                      op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6064                      break;
6065                   case 3:
6066                      add = Iop_Add64;
6067                      op = U ? Iop_Shr64 : Iop_Sar64;
6068                      break;
6069                   default:
6070                      vassert(0);
6071                }
6072             }
6073             assign(res,
6074                    binop(add,
6075                          binop(op,
6076                                mkexpr(reg_m),
6077                                mkU8(shift_imm)),
6078                          binop(Q ? Iop_AndV128 : Iop_And64,
6079                                binop(op,
6080                                      mkexpr(reg_m),
6081                                      mkU8(shift_imm - 1)),
6082                                imm_val)));
6083          } else {
6084             if (Q) {
6085                res = newTemp(Ity_V128);
6086                assign(res, getQReg(mreg));
6087             } else {
6088                res = newTemp(Ity_I64);
6089                assign(res, getDRegI64(mreg));
6090             }
6091          }
6092          if (A == 3) {
6093             if (Q) {
6094                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6095                              condT);
6096             } else {
6097                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6098                                 condT);
6099             }
6100             DIP("vrsra.%c%d %c%u, %c%u, #%u\n",
6101                 U ? 'u' : 's', 8 << size,
6102                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6103          } else {
6104             if (Q) {
6105                putQReg(dreg, mkexpr(res), condT);
6106             } else {
6107                putDRegI64(dreg, mkexpr(res), condT);
6108             }
6109             DIP("vrshr.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6110                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6111          }
6112          return True;
6113       case 1:
6114       case 0:
6115          /* VSHR, VSRA */
6116          if (Q) {
6117             reg_m = newTemp(Ity_V128);
6118             assign(reg_m, getQReg(mreg));
6119             res = newTemp(Ity_V128);
6120          } else {
6121             reg_m = newTemp(Ity_I64);
6122             assign(reg_m, getDRegI64(mreg));
6123             res = newTemp(Ity_I64);
6124          }
6125          if (Q) {
6126             switch (size) {
6127                case 0:
6128                   op = U ? Iop_ShrN8x16 : Iop_SarN8x16;
6129                   add = Iop_Add8x16;
6130                   break;
6131                case 1:
6132                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6133                   add = Iop_Add16x8;
6134                   break;
6135                case 2:
6136                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6137                   add = Iop_Add32x4;
6138                   break;
6139                case 3:
6140                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6141                   add = Iop_Add64x2;
6142                   break;
6143                default:
6144                   vassert(0);
6145             }
6146          } else {
6147             switch (size) {
6148                case 0:
6149                   op =  U ? Iop_ShrN8x8 : Iop_SarN8x8;
6150                   add = Iop_Add8x8;
6151                   break;
6152                case 1:
6153                   op = U ? Iop_ShrN16x4 : Iop_SarN16x4;
6154                   add = Iop_Add16x4;
6155                   break;
6156                case 2:
6157                   op = U ? Iop_ShrN32x2 : Iop_SarN32x2;
6158                   add = Iop_Add32x2;
6159                   break;
6160                case 3:
6161                   op = U ? Iop_Shr64 : Iop_Sar64;
6162                   add = Iop_Add64;
6163                   break;
6164                default:
6165                   vassert(0);
6166             }
6167          }
6168          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6169          if (A == 1) {
6170             if (Q) {
6171                putQReg(dreg, binop(add, mkexpr(res), getQReg(dreg)),
6172                              condT);
6173             } else {
6174                putDRegI64(dreg, binop(add, mkexpr(res), getDRegI64(dreg)),
6175                                 condT);
6176             }
6177             DIP("vsra.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6178                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6179          } else {
6180             if (Q) {
6181                putQReg(dreg, mkexpr(res), condT);
6182             } else {
6183                putDRegI64(dreg, mkexpr(res), condT);
6184             }
6185             DIP("vshr.%c%d %c%u, %c%u, #%u\n", U ? 'u' : 's', 8 << size,
6186                   Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6187          }
6188          return True;
6189       case 4:
6190          /* VSRI */
6191          if (!U)
6192             return False;
6193          if (Q) {
6194             res = newTemp(Ity_V128);
6195             mask = newTemp(Ity_V128);
6196          } else {
6197             res = newTemp(Ity_I64);
6198             mask = newTemp(Ity_I64);
6199          }
6200          switch (size) {
6201             case 0: op = Q ? Iop_ShrN8x16 : Iop_ShrN8x8; break;
6202             case 1: op = Q ? Iop_ShrN16x8 : Iop_ShrN16x4; break;
6203             case 2: op = Q ? Iop_ShrN32x4 : Iop_ShrN32x2; break;
6204             case 3: op = Q ? Iop_ShrN64x2 : Iop_Shr64; break;
6205             default: vassert(0);
6206          }
6207          if (Q) {
6208             assign(mask, binop(op, binop(Iop_64HLtoV128,
6209                                          mkU64(0xFFFFFFFFFFFFFFFFLL),
6210                                          mkU64(0xFFFFFFFFFFFFFFFFLL)),
6211                                mkU8(shift_imm)));
6212             assign(res, binop(Iop_OrV128,
6213                               binop(Iop_AndV128,
6214                                     getQReg(dreg),
6215                                     unop(Iop_NotV128,
6216                                          mkexpr(mask))),
6217                               binop(op,
6218                                     getQReg(mreg),
6219                                     mkU8(shift_imm))));
6220             putQReg(dreg, mkexpr(res), condT);
6221          } else {
6222             assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6223                                mkU8(shift_imm)));
6224             assign(res, binop(Iop_Or64,
6225                               binop(Iop_And64,
6226                                     getDRegI64(dreg),
6227                                     unop(Iop_Not64,
6228                                          mkexpr(mask))),
6229                               binop(op,
6230                                     getDRegI64(mreg),
6231                                     mkU8(shift_imm))));
6232             putDRegI64(dreg, mkexpr(res), condT);
6233          }
6234          DIP("vsri.%d %c%u, %c%u, #%u\n",
6235              8 << size, Q ? 'q' : 'd', dreg,
6236              Q ? 'q' : 'd', mreg, shift_imm);
6237          return True;
6238       case 5:
6239          if (U) {
6240             /* VSLI */
6241             shift_imm = 8 * (1 << size) - shift_imm;
6242             if (Q) {
6243                res = newTemp(Ity_V128);
6244                mask = newTemp(Ity_V128);
6245             } else {
6246                res = newTemp(Ity_I64);
6247                mask = newTemp(Ity_I64);
6248             }
6249             switch (size) {
6250                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6251                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6252                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6253                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6254                default: vassert(0);
6255             }
6256             if (Q) {
6257                assign(mask, binop(op, binop(Iop_64HLtoV128,
6258                                             mkU64(0xFFFFFFFFFFFFFFFFLL),
6259                                             mkU64(0xFFFFFFFFFFFFFFFFLL)),
6260                                   mkU8(shift_imm)));
6261                assign(res, binop(Iop_OrV128,
6262                                  binop(Iop_AndV128,
6263                                        getQReg(dreg),
6264                                        unop(Iop_NotV128,
6265                                             mkexpr(mask))),
6266                                  binop(op,
6267                                        getQReg(mreg),
6268                                        mkU8(shift_imm))));
6269                putQReg(dreg, mkexpr(res), condT);
6270             } else {
6271                assign(mask, binop(op, mkU64(0xFFFFFFFFFFFFFFFFLL),
6272                                   mkU8(shift_imm)));
6273                assign(res, binop(Iop_Or64,
6274                                  binop(Iop_And64,
6275                                        getDRegI64(dreg),
6276                                        unop(Iop_Not64,
6277                                             mkexpr(mask))),
6278                                  binop(op,
6279                                        getDRegI64(mreg),
6280                                        mkU8(shift_imm))));
6281                putDRegI64(dreg, mkexpr(res), condT);
6282             }
6283             DIP("vsli.%d %c%u, %c%u, #%u\n",
6284                 8 << size, Q ? 'q' : 'd', dreg,
6285                 Q ? 'q' : 'd', mreg, shift_imm);
6286             return True;
6287          } else {
6288             /* VSHL #imm */
6289             shift_imm = 8 * (1 << size) - shift_imm;
6290             if (Q) {
6291                res = newTemp(Ity_V128);
6292             } else {
6293                res = newTemp(Ity_I64);
6294             }
6295             switch (size) {
6296                case 0: op = Q ? Iop_ShlN8x16 : Iop_ShlN8x8; break;
6297                case 1: op = Q ? Iop_ShlN16x8 : Iop_ShlN16x4; break;
6298                case 2: op = Q ? Iop_ShlN32x4 : Iop_ShlN32x2; break;
6299                case 3: op = Q ? Iop_ShlN64x2 : Iop_Shl64; break;
6300                default: vassert(0);
6301             }
6302             assign(res, binop(op, Q ? getQReg(mreg) : getDRegI64(mreg),
6303                      mkU8(shift_imm)));
6304             if (Q) {
6305                putQReg(dreg, mkexpr(res), condT);
6306             } else {
6307                putDRegI64(dreg, mkexpr(res), condT);
6308             }
6309             DIP("vshl.i%d %c%u, %c%u, #%u\n",
6310                 8 << size, Q ? 'q' : 'd', dreg,
6311                 Q ? 'q' : 'd', mreg, shift_imm);
6312             return True;
6313          }
6314          break;
6315       case 6:
6316       case 7:
6317          /* VQSHL, VQSHLU */
6318          shift_imm = 8 * (1 << size) - shift_imm;
6319          if (U) {
6320             if (A & 1) {
6321                switch (size) {
6322                   case 0:
6323                      op = Q ? Iop_QShlNsatUU8x16 : Iop_QShlNsatUU8x8;
6324                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6325                      break;
6326                   case 1:
6327                      op = Q ? Iop_QShlNsatUU16x8 : Iop_QShlNsatUU16x4;
6328                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6329                      break;
6330                   case 2:
6331                      op = Q ? Iop_QShlNsatUU32x4 : Iop_QShlNsatUU32x2;
6332                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6333                      break;
6334                   case 3:
6335                      op = Q ? Iop_QShlNsatUU64x2 : Iop_QShlNsatUU64x1;
6336                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6337                      break;
6338                   default:
6339                      vassert(0);
6340                }
6341                DIP("vqshl.u%d %c%u, %c%u, #%u\n",
6342                    8 << size,
6343                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6344             } else {
6345                switch (size) {
6346                   case 0:
6347                      op = Q ? Iop_QShlNsatSU8x16 : Iop_QShlNsatSU8x8;
6348                      op_rev = Q ? Iop_ShrN8x16 : Iop_ShrN8x8;
6349                      break;
6350                   case 1:
6351                      op = Q ? Iop_QShlNsatSU16x8 : Iop_QShlNsatSU16x4;
6352                      op_rev = Q ? Iop_ShrN16x8 : Iop_ShrN16x4;
6353                      break;
6354                   case 2:
6355                      op = Q ? Iop_QShlNsatSU32x4 : Iop_QShlNsatSU32x2;
6356                      op_rev = Q ? Iop_ShrN32x4 : Iop_ShrN32x2;
6357                      break;
6358                   case 3:
6359                      op = Q ? Iop_QShlNsatSU64x2 : Iop_QShlNsatSU64x1;
6360                      op_rev = Q ? Iop_ShrN64x2 : Iop_Shr64;
6361                      break;
6362                   default:
6363                      vassert(0);
6364                }
6365                DIP("vqshlu.s%d %c%u, %c%u, #%u\n",
6366                    8 << size,
6367                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6368             }
6369          } else {
6370             if (!(A & 1))
6371                return False;
6372             switch (size) {
6373                case 0:
6374                   op = Q ? Iop_QShlNsatSS8x16 : Iop_QShlNsatSS8x8;
6375                   op_rev = Q ? Iop_SarN8x16 : Iop_SarN8x8;
6376                   break;
6377                case 1:
6378                   op = Q ? Iop_QShlNsatSS16x8 : Iop_QShlNsatSS16x4;
6379                   op_rev = Q ? Iop_SarN16x8 : Iop_SarN16x4;
6380                   break;
6381                case 2:
6382                   op = Q ? Iop_QShlNsatSS32x4 : Iop_QShlNsatSS32x2;
6383                   op_rev = Q ? Iop_SarN32x4 : Iop_SarN32x2;
6384                   break;
6385                case 3:
6386                   op = Q ? Iop_QShlNsatSS64x2 : Iop_QShlNsatSS64x1;
6387                   op_rev = Q ? Iop_SarN64x2 : Iop_Sar64;
6388                   break;
6389                default:
6390                   vassert(0);
6391             }
6392             DIP("vqshl.s%d %c%u, %c%u, #%u\n",
6393                 8 << size,
6394                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg, shift_imm);
6395          }
6396          if (Q) {
6397             tmp = newTemp(Ity_V128);
6398             res = newTemp(Ity_V128);
6399             reg_m = newTemp(Ity_V128);
6400             assign(reg_m, getQReg(mreg));
6401          } else {
6402             tmp = newTemp(Ity_I64);
6403             res = newTemp(Ity_I64);
6404             reg_m = newTemp(Ity_I64);
6405             assign(reg_m, getDRegI64(mreg));
6406          }
6407          assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6408          assign(tmp, binop(op_rev, mkexpr(res), mkU8(shift_imm)));
6409          setFlag_QC(mkexpr(tmp), mkexpr(reg_m), Q, condT);
6410          if (Q)
6411             putQReg(dreg, mkexpr(res), condT);
6412          else
6413             putDRegI64(dreg, mkexpr(res), condT);
6414          return True;
6415       case 8:
6416          if (!U) {
6417             if (L == 1)
6418                return False;
6419             size++;
6420             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6421             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
6422             if (mreg & 1)
6423                return False;
6424             mreg >>= 1;
6425             if (!B) {
6426                /* VSHRN*/
6427                IROp narOp;
6428                reg_m = newTemp(Ity_V128);
6429                assign(reg_m, getQReg(mreg));
6430                res = newTemp(Ity_I64);
6431                switch (size) {
6432                   case 1:
6433                      op = Iop_ShrN16x8;
6434                      narOp = Iop_NarrowUn16to8x8;
6435                      break;
6436                   case 2:
6437                      op = Iop_ShrN32x4;
6438                      narOp = Iop_NarrowUn32to16x4;
6439                      break;
6440                   case 3:
6441                      op = Iop_ShrN64x2;
6442                      narOp = Iop_NarrowUn64to32x2;
6443                      break;
6444                   default:
6445                      vassert(0);
6446                }
6447                assign(res, unop(narOp,
6448                                 binop(op,
6449                                       mkexpr(reg_m),
6450                                       mkU8(shift_imm))));
6451                putDRegI64(dreg, mkexpr(res), condT);
6452                DIP("vshrn.i%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6453                    shift_imm);
6454                return True;
6455             } else {
6456                /* VRSHRN   */
6457                IROp addOp, shOp, narOp;
6458                IRExpr *imm_val;
6459                reg_m = newTemp(Ity_V128);
6460                assign(reg_m, getQReg(mreg));
6461                res = newTemp(Ity_I64);
6462                imm = 1L;
6463                switch (size) {
6464                   case 0: imm = (imm <<  8) | imm; /* fall through */
6465                   case 1: imm = (imm << 16) | imm; /* fall through */
6466                   case 2: imm = (imm << 32) | imm; /* fall through */
6467                   case 3: break;
6468                   default: vassert(0);
6469                }
6470                imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
6471                switch (size) {
6472                   case 1:
6473                      addOp = Iop_Add16x8;
6474                      shOp = Iop_ShrN16x8;
6475                      narOp = Iop_NarrowUn16to8x8;
6476                      break;
6477                   case 2:
6478                      addOp = Iop_Add32x4;
6479                      shOp = Iop_ShrN32x4;
6480                      narOp = Iop_NarrowUn32to16x4;
6481                      break;
6482                   case 3:
6483                      addOp = Iop_Add64x2;
6484                      shOp = Iop_ShrN64x2;
6485                      narOp = Iop_NarrowUn64to32x2;
6486                      break;
6487                   default:
6488                      vassert(0);
6489                }
6490                assign(res, unop(narOp,
6491                                 binop(addOp,
6492                                       binop(shOp,
6493                                             mkexpr(reg_m),
6494                                             mkU8(shift_imm)),
6495                                       binop(Iop_AndV128,
6496                                             binop(shOp,
6497                                                   mkexpr(reg_m),
6498                                                   mkU8(shift_imm - 1)),
6499                                             imm_val))));
6500                putDRegI64(dreg, mkexpr(res), condT);
6501                if (shift_imm == 0) {
6502                   DIP("vmov%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6503                       shift_imm);
6504                } else {
6505                   DIP("vrshrn.i%d d%u, q%u, #%u\n", 8 << size, dreg, mreg,
6506                       shift_imm);
6507                }
6508                return True;
6509             }
6510          }
6511          /* else fall through */
6512       case 9:
6513          dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
6514          mreg = ((theInstr >>  1) & 0x10) | (theInstr & 0xF);
6515          if (mreg & 1)
6516             return False;
6517          mreg >>= 1;
6518          size++;
6519          if ((theInstr >> 8) & 1) {
6520             switch (size) {
6521                case 1:
6522                   op = U ? Iop_ShrN16x8 : Iop_SarN16x8;
6523                   cvt = U ? Iop_QNarrowUn16Uto8Ux8 : Iop_QNarrowUn16Sto8Sx8;
6524                   cvt2 = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6525                   break;
6526                case 2:
6527                   op = U ? Iop_ShrN32x4 : Iop_SarN32x4;
6528                   cvt = U ? Iop_QNarrowUn32Uto16Ux4 : Iop_QNarrowUn32Sto16Sx4;
6529                   cvt2 = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6530                   break;
6531                case 3:
6532                   op = U ? Iop_ShrN64x2 : Iop_SarN64x2;
6533                   cvt = U ? Iop_QNarrowUn64Uto32Ux2 : Iop_QNarrowUn64Sto32Sx2;
6534                   cvt2 = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6535                   break;
6536                default:
6537                   vassert(0);
6538             }
6539             DIP("vq%sshrn.%c%d d%u, q%u, #%u\n", B ? "r" : "",
6540                 U ? 'u' : 's', 8 << size, dreg, mreg, shift_imm);
6541          } else {
6542             vassert(U);
6543             switch (size) {
6544                case 1:
6545                   op = Iop_SarN16x8;
6546                   cvt = Iop_QNarrowUn16Sto8Ux8;
6547                   cvt2 = Iop_Widen8Uto16x8;
6548                   break;
6549                case 2:
6550                   op = Iop_SarN32x4;
6551                   cvt = Iop_QNarrowUn32Sto16Ux4;
6552                   cvt2 = Iop_Widen16Uto32x4;
6553                   break;
6554                case 3:
6555                   op = Iop_SarN64x2;
6556                   cvt = Iop_QNarrowUn64Sto32Ux2;
6557                   cvt2 = Iop_Widen32Uto64x2;
6558                   break;
6559                default:
6560                   vassert(0);
6561             }
6562             DIP("vq%sshrun.s%d d%u, q%u, #%u\n", B ? "r" : "",
6563                 8 << size, dreg, mreg, shift_imm);
6564          }
6565          if (B) {
6566             if (shift_imm > 0) {
6567                imm = 1;
6568                switch (size) {
6569                   case 1: imm = (imm << 16) | imm; /* fall through */
6570                   case 2: imm = (imm << 32) | imm; /* fall through */
6571                   case 3: break;
6572                   case 0: default: vassert(0);
6573                }
6574                switch (size) {
6575                   case 1: add = Iop_Add16x8; break;
6576                   case 2: add = Iop_Add32x4; break;
6577                   case 3: add = Iop_Add64x2; break;
6578                   case 0: default: vassert(0);
6579                }
6580             }
6581          }
6582          reg_m = newTemp(Ity_V128);
6583          res = newTemp(Ity_V128);
6584          assign(reg_m, getQReg(mreg));
6585          if (B) {
6586             /* VQRSHRN, VQRSHRUN */
6587             assign(res, binop(add,
6588                               binop(op, mkexpr(reg_m), mkU8(shift_imm)),
6589                               binop(Iop_AndV128,
6590                                     binop(op,
6591                                           mkexpr(reg_m),
6592                                           mkU8(shift_imm - 1)),
6593                                     mkU128(imm))));
6594          } else {
6595             /* VQSHRN, VQSHRUN */
6596             assign(res, binop(op, mkexpr(reg_m), mkU8(shift_imm)));
6597          }
6598          setFlag_QC(unop(cvt2, unop(cvt, mkexpr(res))), mkexpr(res),
6599                     True, condT);
6600          putDRegI64(dreg, unop(cvt, mkexpr(res)), condT);
6601          return True;
6602       case 10:
6603          /* VSHLL
6604             VMOVL ::= VSHLL #0 */
6605          if (B)
6606             return False;
6607          if (dreg & 1)
6608             return False;
6609          dreg >>= 1;
6610          shift_imm = (8 << size) - shift_imm;
6611          res = newTemp(Ity_V128);
6612          switch (size) {
6613             case 0:
6614                op = Iop_ShlN16x8;
6615                cvt = U ? Iop_Widen8Uto16x8 : Iop_Widen8Sto16x8;
6616                break;
6617             case 1:
6618                op = Iop_ShlN32x4;
6619                cvt = U ? Iop_Widen16Uto32x4 : Iop_Widen16Sto32x4;
6620                break;
6621             case 2:
6622                op = Iop_ShlN64x2;
6623                cvt = U ? Iop_Widen32Uto64x2 : Iop_Widen32Sto64x2;
6624                break;
6625             case 3:
6626                return False;
6627             default:
6628                vassert(0);
6629          }
6630          assign(res, binop(op, unop(cvt, getDRegI64(mreg)), mkU8(shift_imm)));
6631          putQReg(dreg, mkexpr(res), condT);
6632          if (shift_imm == 0) {
6633             DIP("vmovl.%c%d q%u, d%u\n", U ? 'u' : 's', 8 << size,
6634                 dreg, mreg);
6635          } else {
6636             DIP("vshll.%c%d q%u, d%u, #%u\n", U ? 'u' : 's', 8 << size,
6637                 dreg, mreg, shift_imm);
6638          }
6639          return True;
6640       case 14:
6641       case 15:
6642          /* VCVT floating-point <-> fixed-point */
6643          if ((theInstr >> 8) & 1) {
6644             if (U) {
6645                op = Q ? Iop_F32ToFixed32Ux4_RZ : Iop_F32ToFixed32Ux2_RZ;
6646             } else {
6647                op = Q ? Iop_F32ToFixed32Sx4_RZ : Iop_F32ToFixed32Sx2_RZ;
6648             }
6649             DIP("vcvt.%c32.f32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6650                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6651                 64 - ((theInstr >> 16) & 0x3f));
6652          } else {
6653             if (U) {
6654                op = Q ? Iop_Fixed32UToF32x4_RN : Iop_Fixed32UToF32x2_RN;
6655             } else {
6656                op = Q ? Iop_Fixed32SToF32x4_RN : Iop_Fixed32SToF32x2_RN;
6657             }
6658             DIP("vcvt.f32.%c32 %c%u, %c%u, #%u\n", U ? 'u' : 's',
6659                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg,
6660                 64 - ((theInstr >> 16) & 0x3f));
6661          }
6662          if (((theInstr >> 21) & 1) == 0)
6663             return False;
6664          if (Q) {
6665             putQReg(dreg, binop(op, getQReg(mreg),
6666                      mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6667          } else {
6668             putDRegI64(dreg, binop(op, getDRegI64(mreg),
6669                        mkU8(64 - ((theInstr >> 16) & 0x3f))), condT);
6670          }
6671          return True;
6672       default:
6673          return False;
6674
6675    }
6676    return False;
6677 }
6678
6679 /* A7.4.5 Two registers, miscellaneous */
6680 static
6681 Bool dis_neon_data_2reg_misc ( UInt theInstr, IRTemp condT )
6682 {
6683    UInt A = (theInstr >> 16) & 3;
6684    UInt B = (theInstr >> 6) & 0x1f;
6685    UInt Q = (theInstr >> 6) & 1;
6686    UInt U = (theInstr >> 24) & 1;
6687    UInt size = (theInstr >> 18) & 3;
6688    UInt dreg = get_neon_d_regno(theInstr);
6689    UInt mreg = get_neon_m_regno(theInstr);
6690    UInt F = (theInstr >> 10) & 1;
6691    IRTemp arg_d = IRTemp_INVALID;
6692    IRTemp arg_m = IRTemp_INVALID;
6693    IRTemp res = IRTemp_INVALID;
6694    switch (A) {
6695       case 0:
6696          if (Q) {
6697             arg_m = newTemp(Ity_V128);
6698             res = newTemp(Ity_V128);
6699             assign(arg_m, getQReg(mreg));
6700          } else {
6701             arg_m = newTemp(Ity_I64);
6702             res = newTemp(Ity_I64);
6703             assign(arg_m, getDRegI64(mreg));
6704          }
6705          switch (B >> 1) {
6706             case 0: {
6707                /* VREV64 */
6708                IROp op;
6709                switch (size) {
6710                   case 0:
6711                      op = Q ? Iop_Reverse8sIn64_x2 : Iop_Reverse8sIn64_x1;
6712                      break;
6713                   case 1:
6714                      op = Q ? Iop_Reverse16sIn64_x2 : Iop_Reverse16sIn64_x1;
6715                      break;
6716                   case 2:
6717                      op = Q ? Iop_Reverse32sIn64_x2 : Iop_Reverse32sIn64_x1;
6718                      break;
6719                   case 3:
6720                      return False;
6721                   default:
6722                      vassert(0);
6723                }
6724                assign(res, unop(op, mkexpr(arg_m)));
6725                DIP("vrev64.%d %c%u, %c%u\n", 8 << size,
6726                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6727                break;
6728             }
6729             case 1: {
6730                /* VREV32 */
6731                IROp op;
6732                switch (size) {
6733                   case 0:
6734                      op = Q ? Iop_Reverse8sIn32_x4 : Iop_Reverse8sIn32_x2;
6735                      break;
6736                   case 1:
6737                      op = Q ? Iop_Reverse16sIn32_x4 : Iop_Reverse16sIn32_x2;
6738                      break;
6739                   case 2:
6740                   case 3:
6741                      return False;
6742                   default:
6743                      vassert(0);
6744                }
6745                assign(res, unop(op, mkexpr(arg_m)));
6746                DIP("vrev32.%d %c%u, %c%u\n", 8 << size,
6747                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6748                break;
6749             }
6750             case 2: {
6751                /* VREV16 */
6752                IROp op;
6753                switch (size) {
6754                   case 0:
6755                      op = Q ? Iop_Reverse8sIn16_x8 : Iop_Reverse8sIn16_x4;
6756                      break;
6757                   case 1:
6758                   case 2:
6759                   case 3:
6760                      return False;
6761                   default:
6762                      vassert(0);
6763                }
6764                assign(res, unop(op, mkexpr(arg_m)));
6765                DIP("vrev16.%d %c%u, %c%u\n", 8 << size,
6766                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6767                break;
6768             }
6769             case 3:
6770                return False;
6771             case 4:
6772             case 5: {
6773                /* VPADDL */
6774                IROp op;
6775                U = (theInstr >> 7) & 1;
6776                if (Q) {
6777                   switch (size) {
6778                      case 0: op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16; break;
6779                      case 1: op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8; break;
6780                      case 2: op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4; break;
6781                      case 3: return False;
6782                      default: vassert(0);
6783                   }
6784                } else {
6785                   switch (size) {
6786                      case 0: op = U ? Iop_PwAddL8Ux8  : Iop_PwAddL8Sx8;  break;
6787                      case 1: op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4; break;
6788                      case 2: op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2; break;
6789                      case 3: return False;
6790                      default: vassert(0);
6791                   }
6792                }
6793                assign(res, unop(op, mkexpr(arg_m)));
6794                DIP("vpaddl.%c%d %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6795                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6796                break;
6797             }
6798             case 6:
6799             case 7:
6800                return False;
6801             case 8: {
6802                /* VCLS */
6803                IROp op;
6804                switch (size) {
6805                   case 0: op = Q ? Iop_Cls8x16 : Iop_Cls8x8; break;
6806                   case 1: op = Q ? Iop_Cls16x8 : Iop_Cls16x4; break;
6807                   case 2: op = Q ? Iop_Cls32x4 : Iop_Cls32x2; break;
6808                   case 3: return False;
6809                   default: vassert(0);
6810                }
6811                assign(res, unop(op, mkexpr(arg_m)));
6812                DIP("vcls.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6813                    Q ? 'q' : 'd', mreg);
6814                break;
6815             }
6816             case 9: {
6817                /* VCLZ */
6818                IROp op;
6819                switch (size) {
6820                   case 0: op = Q ? Iop_Clz8x16 : Iop_Clz8x8; break;
6821                   case 1: op = Q ? Iop_Clz16x8 : Iop_Clz16x4; break;
6822                   case 2: op = Q ? Iop_Clz32x4 : Iop_Clz32x2; break;
6823                   case 3: return False;
6824                   default: vassert(0);
6825                }
6826                assign(res, unop(op, mkexpr(arg_m)));
6827                DIP("vclz.i%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6828                    Q ? 'q' : 'd', mreg);
6829                break;
6830             }
6831             case 10:
6832                /* VCNT */
6833                assign(res, unop(Q ? Iop_Cnt8x16 : Iop_Cnt8x8, mkexpr(arg_m)));
6834                DIP("vcnt.8 %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6835                    mreg);
6836                break;
6837             case 11:
6838                /* VMVN */
6839                if (Q)
6840                   assign(res, unop(Iop_NotV128, mkexpr(arg_m)));
6841                else
6842                   assign(res, unop(Iop_Not64, mkexpr(arg_m)));
6843                DIP("vmvn %c%u, %c%u\n", Q ? 'q' : 'd', dreg, Q ? 'q' : 'd',
6844                    mreg);
6845                break;
6846             case 12:
6847             case 13: {
6848                /* VPADAL */
6849                IROp op, add_op;
6850                U = (theInstr >> 7) & 1;
6851                if (Q) {
6852                   switch (size) {
6853                      case 0:
6854                         op = U ? Iop_PwAddL8Ux16 : Iop_PwAddL8Sx16;
6855                         add_op = Iop_Add16x8;
6856                         break;
6857                      case 1:
6858                         op = U ? Iop_PwAddL16Ux8 : Iop_PwAddL16Sx8;
6859                         add_op = Iop_Add32x4;
6860                         break;
6861                      case 2:
6862                         op = U ? Iop_PwAddL32Ux4 : Iop_PwAddL32Sx4;
6863                         add_op = Iop_Add64x2;
6864                         break;
6865                      case 3:
6866                         return False;
6867                      default:
6868                         vassert(0);
6869                   }
6870                } else {
6871                   switch (size) {
6872                      case 0:
6873                         op = U ? Iop_PwAddL8Ux8 : Iop_PwAddL8Sx8;
6874                         add_op = Iop_Add16x4;
6875                         break;
6876                      case 1:
6877                         op = U ? Iop_PwAddL16Ux4 : Iop_PwAddL16Sx4;
6878                         add_op = Iop_Add32x2;
6879                         break;
6880                      case 2:
6881                         op = U ? Iop_PwAddL32Ux2 : Iop_PwAddL32Sx2;
6882                         add_op = Iop_Add64;
6883                         break;
6884                      case 3:
6885                         return False;
6886                      default:
6887                         vassert(0);
6888                   }
6889                }
6890                if (Q) {
6891                   arg_d = newTemp(Ity_V128);
6892                   assign(arg_d, getQReg(dreg));
6893                } else {
6894                   arg_d = newTemp(Ity_I64);
6895                   assign(arg_d, getDRegI64(dreg));
6896                }
6897                assign(res, binop(add_op, unop(op, mkexpr(arg_m)),
6898                                          mkexpr(arg_d)));
6899                DIP("vpadal.%c%d %c%u, %c%u\n", U ? 'u' : 's', 8 << size,
6900                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
6901                break;
6902             }
6903             case 14: {
6904                /* VQABS */
6905                IROp op_sub, op_qsub, op_cmp;
6906                IRTemp mask, tmp;
6907                IRExpr *zero1, *zero2;
6908                IRExpr *neg, *neg2;
6909                if (Q) {
6910                   zero1 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6911                   zero2 = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6912                   mask = newTemp(Ity_V128);
6913                   tmp = newTemp(Ity_V128);
6914                } else {
6915                   zero1 = mkU64(0);
6916                   zero2 = mkU64(0);
6917                   mask = newTemp(Ity_I64);
6918                   tmp = newTemp(Ity_I64);
6919                }
6920                switch (size) {
6921                   case 0:
6922                      op_sub = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6923                      op_qsub = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6924                      op_cmp = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8;
6925                      break;
6926                   case 1:
6927                      op_sub = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6928                      op_qsub = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6929                      op_cmp = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4;
6930                      break;
6931                   case 2:
6932                      op_sub = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6933                      op_qsub = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6934                      op_cmp = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2;
6935                      break;
6936                   case 3:
6937                      return False;
6938                   default:
6939                      vassert(0);
6940                }
6941                assign(mask, binop(op_cmp, mkexpr(arg_m), zero1));
6942                neg = binop(op_qsub, zero2, mkexpr(arg_m));
6943                neg2 = binop(op_sub, zero2, mkexpr(arg_m));
6944                assign(res, binop(Q ? Iop_OrV128 : Iop_Or64,
6945                                  binop(Q ? Iop_AndV128 : Iop_And64,
6946                                        mkexpr(mask),
6947                                        mkexpr(arg_m)),
6948                                  binop(Q ? Iop_AndV128 : Iop_And64,
6949                                        unop(Q ? Iop_NotV128 : Iop_Not64,
6950                                             mkexpr(mask)),
6951                                        neg)));
6952                assign(tmp, binop(Q ? Iop_OrV128 : Iop_Or64,
6953                                  binop(Q ? Iop_AndV128 : Iop_And64,
6954                                        mkexpr(mask),
6955                                        mkexpr(arg_m)),
6956                                  binop(Q ? Iop_AndV128 : Iop_And64,
6957                                        unop(Q ? Iop_NotV128 : Iop_Not64,
6958                                             mkexpr(mask)),
6959                                        neg2)));
6960                setFlag_QC(mkexpr(res), mkexpr(tmp), Q, condT);
6961                DIP("vqabs.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6962                    Q ? 'q' : 'd', mreg);
6963                break;
6964             }
6965             case 15: {
6966                /* VQNEG */
6967                IROp op, op2;
6968                IRExpr *zero;
6969                if (Q) {
6970                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
6971                } else {
6972                   zero = mkU64(0);
6973                }
6974                switch (size) {
6975                   case 0:
6976                      op = Q ? Iop_QSub8Sx16 : Iop_QSub8Sx8;
6977                      op2 = Q ? Iop_Sub8x16 : Iop_Sub8x8;
6978                      break;
6979                   case 1:
6980                      op = Q ? Iop_QSub16Sx8 : Iop_QSub16Sx4;
6981                      op2 = Q ? Iop_Sub16x8 : Iop_Sub16x4;
6982                      break;
6983                   case 2:
6984                      op = Q ? Iop_QSub32Sx4 : Iop_QSub32Sx2;
6985                      op2 = Q ? Iop_Sub32x4 : Iop_Sub32x2;
6986                      break;
6987                   case 3:
6988                      return False;
6989                   default:
6990                      vassert(0);
6991                }
6992                assign(res, binop(op, zero, mkexpr(arg_m)));
6993                setFlag_QC(mkexpr(res), binop(op2, zero, mkexpr(arg_m)),
6994                           Q, condT);
6995                DIP("vqneg.s%d %c%u, %c%u\n", 8 << size, Q ? 'q' : 'd', dreg,
6996                    Q ? 'q' : 'd', mreg);
6997                break;
6998             }
6999             default:
7000                vassert(0);
7001          }
7002          if (Q) {
7003             putQReg(dreg, mkexpr(res), condT);
7004          } else {
7005             putDRegI64(dreg, mkexpr(res), condT);
7006          }
7007          return True;
7008       case 1:
7009          if (Q) {
7010             arg_m = newTemp(Ity_V128);
7011             res = newTemp(Ity_V128);
7012             assign(arg_m, getQReg(mreg));
7013          } else {
7014             arg_m = newTemp(Ity_I64);
7015             res = newTemp(Ity_I64);
7016             assign(arg_m, getDRegI64(mreg));
7017          }
7018          switch ((B >> 1) & 0x7) {
7019             case 0: {
7020                /* VCGT #0 */
7021                IRExpr *zero;
7022                IROp op;
7023                if (Q) {
7024                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7025                } else {
7026                   zero = mkU64(0);
7027                }
7028                if (F) {
7029                   switch (size) {
7030                      case 0: case 1: case 3: return False;
7031                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
7032                      default: vassert(0);
7033                   }
7034                } else {
7035                   switch (size) {
7036                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7037                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7038                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7039                      case 3: return False;
7040                      default: vassert(0);
7041                   }
7042                }
7043                assign(res, binop(op, mkexpr(arg_m), zero));
7044                DIP("vcgt.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7045                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7046                break;
7047             }
7048             case 1: {
7049                /* VCGE #0 */
7050                IROp op;
7051                IRExpr *zero;
7052                if (Q) {
7053                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7054                } else {
7055                   zero = mkU64(0);
7056                }
7057                if (F) {
7058                   switch (size) {
7059                      case 0: case 1: case 3: return False;
7060                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
7061                      default: vassert(0);
7062                   }
7063                   assign(res, binop(op, mkexpr(arg_m), zero));
7064                } else {
7065                   switch (size) {
7066                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7067                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7068                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7069                      case 3: return False;
7070                      default: vassert(0);
7071                   }
7072                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7073                                    binop(op, zero, mkexpr(arg_m))));
7074                }
7075                DIP("vcge.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7076                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7077                break;
7078             }
7079             case 2: {
7080                /* VCEQ #0 */
7081                IROp op;
7082                IRExpr *zero;
7083                if (F) {
7084                   if (Q) {
7085                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7086                   } else {
7087                      zero = mkU64(0);
7088                   }
7089                   switch (size) {
7090                      case 0: case 1: case 3: return False;
7091                      case 2: op = Q ? Iop_CmpEQ32Fx4 : Iop_CmpEQ32Fx2; break;
7092                      default: vassert(0);
7093                   }
7094                   assign(res, binop(op, zero, mkexpr(arg_m)));
7095                } else {
7096                   switch (size) {
7097                      case 0: op = Q ? Iop_CmpNEZ8x16 : Iop_CmpNEZ8x8; break;
7098                      case 1: op = Q ? Iop_CmpNEZ16x8 : Iop_CmpNEZ16x4; break;
7099                      case 2: op = Q ? Iop_CmpNEZ32x4 : Iop_CmpNEZ32x2; break;
7100                      case 3: return False;
7101                      default: vassert(0);
7102                   }
7103                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7104                                    unop(op, mkexpr(arg_m))));
7105                }
7106                DIP("vceq.%c%d %c%u, %c%u, #0\n", F ? 'f' : 'i', 8 << size,
7107                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7108                break;
7109             }
7110             case 3: {
7111                /* VCLE #0 */
7112                IRExpr *zero;
7113                IROp op;
7114                if (Q) {
7115                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7116                } else {
7117                   zero = mkU64(0);
7118                }
7119                if (F) {
7120                   switch (size) {
7121                      case 0: case 1: case 3: return False;
7122                      case 2: op = Q ? Iop_CmpGE32Fx4 : Iop_CmpGE32Fx2; break;
7123                      default: vassert(0);
7124                   }
7125                   assign(res, binop(op, zero, mkexpr(arg_m)));
7126                } else {
7127                   switch (size) {
7128                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7129                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7130                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7131                      case 3: return False;
7132                      default: vassert(0);
7133                   }
7134                   assign(res, unop(Q ? Iop_NotV128 : Iop_Not64,
7135                                    binop(op, mkexpr(arg_m), zero)));
7136                }
7137                DIP("vcle.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7138                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7139                break;
7140             }
7141             case 4: {
7142                /* VCLT #0 */
7143                IROp op;
7144                IRExpr *zero;
7145                if (Q) {
7146                   zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7147                } else {
7148                   zero = mkU64(0);
7149                }
7150                if (F) {
7151                   switch (size) {
7152                      case 0: case 1: case 3: return False;
7153                      case 2: op = Q ? Iop_CmpGT32Fx4 : Iop_CmpGT32Fx2; break;
7154                      default: vassert(0);
7155                   }
7156                   assign(res, binop(op, zero, mkexpr(arg_m)));
7157                } else {
7158                   switch (size) {
7159                      case 0: op = Q ? Iop_CmpGT8Sx16 : Iop_CmpGT8Sx8; break;
7160                      case 1: op = Q ? Iop_CmpGT16Sx8 : Iop_CmpGT16Sx4; break;
7161                      case 2: op = Q ? Iop_CmpGT32Sx4 : Iop_CmpGT32Sx2; break;
7162                      case 3: return False;
7163                      default: vassert(0);
7164                   }
7165                   assign(res, binop(op, zero, mkexpr(arg_m)));
7166                }
7167                DIP("vclt.%c%d %c%u, %c%u, #0\n", F ? 'f' : 's', 8 << size,
7168                    Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7169                break;
7170             }
7171             case 5:
7172                return False;
7173             case 6: {
7174                /* VABS */
7175                if (!F) {
7176                   IROp op;
7177                   switch(size) {
7178                      case 0: op = Q ? Iop_Abs8x16 : Iop_Abs8x8; break;
7179                      case 1: op = Q ? Iop_Abs16x8 : Iop_Abs16x4; break;
7180                      case 2: op = Q ? Iop_Abs32x4 : Iop_Abs32x2; break;
7181                      case 3: return False;
7182                      default: vassert(0);
7183                   }
7184                   assign(res, unop(op, mkexpr(arg_m)));
7185                } else {
7186                   assign(res, unop(Q ? Iop_Abs32Fx4 : Iop_Abs32Fx2,
7187                                    mkexpr(arg_m)));
7188                }
7189                DIP("vabs.%c%d %c%u, %c%u\n",
7190                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7191                    Q ? 'q' : 'd', mreg);
7192                break;
7193             }
7194             case 7: {
7195                /* VNEG */
7196                IROp op;
7197                IRExpr *zero;
7198                if (F) {
7199                   switch (size) {
7200                      case 0: case 1: case 3: return False;
7201                      case 2: op = Q ? Iop_Neg32Fx4 : Iop_Neg32Fx2; break;
7202                      default: vassert(0);
7203                   }
7204                   assign(res, unop(op, mkexpr(arg_m)));
7205                } else {
7206                   if (Q) {
7207                      zero = binop(Iop_64HLtoV128, mkU64(0), mkU64(0));
7208                   } else {
7209                      zero = mkU64(0);
7210                   }
7211                   switch (size) {
7212                      case 0: op = Q ? Iop_Sub8x16 : Iop_Sub8x8; break;
7213                      case 1: op = Q ? Iop_Sub16x8 : Iop_Sub16x4; break;
7214                      case 2: op = Q ? Iop_Sub32x4 : Iop_Sub32x2; break;
7215                      case 3: return False;
7216                      default: vassert(0);
7217                   }
7218                   assign(res, binop(op, zero, mkexpr(arg_m)));
7219                }
7220                DIP("vneg.%c%d %c%u, %c%u\n",
7221                    F ? 'f' : 's', 8 << size, Q ? 'q' : 'd', dreg,
7222                    Q ? 'q' : 'd', mreg);
7223                break;
7224             }
7225             default:
7226                vassert(0);
7227          }
7228          if (Q) {
7229             putQReg(dreg, mkexpr(res), condT);
7230          } else {
7231             putDRegI64(dreg, mkexpr(res), condT);
7232          }
7233          return True;
7234       case 2:
7235          if ((B >> 1) == 0) {
7236             /* VSWP */
7237             if (Q) {
7238                arg_m = newTemp(Ity_V128);
7239                assign(arg_m, getQReg(mreg));
7240                putQReg(mreg, getQReg(dreg), condT);
7241                putQReg(dreg, mkexpr(arg_m), condT);
7242             } else {
7243                arg_m = newTemp(Ity_I64);
7244                assign(arg_m, getDRegI64(mreg));
7245                putDRegI64(mreg, getDRegI64(dreg), condT);
7246                putDRegI64(dreg, mkexpr(arg_m), condT);
7247             }
7248             DIP("vswp %c%u, %c%u\n",
7249                 Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7250             return True;
7251          } else if ((B >> 1) == 1) {
7252             /* VTRN */
7253             IROp op_odd = Iop_INVALID, op_even = Iop_INVALID;
7254             IRTemp old_m, old_d, new_d, new_m;
7255             if (Q) {
7256                old_m = newTemp(Ity_V128);
7257                old_d = newTemp(Ity_V128);
7258                new_m = newTemp(Ity_V128);
7259                new_d = newTemp(Ity_V128);
7260                assign(old_m, getQReg(mreg));
7261                assign(old_d, getQReg(dreg));
7262             } else {
7263                old_m = newTemp(Ity_I64);
7264                old_d = newTemp(Ity_I64);
7265                new_m = newTemp(Ity_I64);
7266                new_d = newTemp(Ity_I64);
7267                assign(old_m, getDRegI64(mreg));
7268                assign(old_d, getDRegI64(dreg));
7269             }
7270             if (Q) {
7271                switch (size) {
7272                   case 0:
7273                      op_odd  = Iop_InterleaveOddLanes8x16;
7274                      op_even = Iop_InterleaveEvenLanes8x16;
7275                      break;
7276                   case 1:
7277                      op_odd  = Iop_InterleaveOddLanes16x8;
7278                      op_even = Iop_InterleaveEvenLanes16x8;
7279                      break;
7280                   case 2:
7281                      op_odd  = Iop_InterleaveOddLanes32x4;
7282                      op_even = Iop_InterleaveEvenLanes32x4;
7283                      break;
7284                   case 3:
7285                      return False;
7286                   default:
7287                      vassert(0);
7288                }
7289             } else {
7290                switch (size) {
7291                   case 0:
7292                      op_odd  = Iop_InterleaveOddLanes8x8;
7293                      op_even = Iop_InterleaveEvenLanes8x8;
7294                      break;
7295                   case 1:
7296                      op_odd  = Iop_InterleaveOddLanes16x4;
7297                      op_even = Iop_InterleaveEvenLanes16x4;
7298                      break;
7299                   case 2:
7300                      op_odd  = Iop_InterleaveHI32x2;
7301                      op_even = Iop_InterleaveLO32x2;
7302                      break;
7303                   case 3:
7304                      return False;
7305                   default:
7306                      vassert(0);
7307                }
7308             }
7309             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7310             assign(new_m, binop(op_odd, mkexpr(old_m), mkexpr(old_d)));
7311             if (Q) {
7312                putQReg(dreg, mkexpr(new_d), condT);
7313                putQReg(mreg, mkexpr(new_m), condT);
7314             } else {
7315                putDRegI64(dreg, mkexpr(new_d), condT);
7316                putDRegI64(mreg, mkexpr(new_m), condT);
7317             }
7318             DIP("vtrn.%d %c%u, %c%u\n",
7319                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7320             return True;
7321          } else if ((B >> 1) == 2) {
7322             /* VUZP */
7323             IROp op_even, op_odd;
7324             IRTemp old_m, old_d, new_m, new_d;
7325             if (!Q && size == 2)
7326                return False;
7327             if (Q) {
7328                old_m = newTemp(Ity_V128);
7329                old_d = newTemp(Ity_V128);
7330                new_m = newTemp(Ity_V128);
7331                new_d = newTemp(Ity_V128);
7332                assign(old_m, getQReg(mreg));
7333                assign(old_d, getQReg(dreg));
7334             } else {
7335                old_m = newTemp(Ity_I64);
7336                old_d = newTemp(Ity_I64);
7337                new_m = newTemp(Ity_I64);
7338                new_d = newTemp(Ity_I64);
7339                assign(old_m, getDRegI64(mreg));
7340                assign(old_d, getDRegI64(dreg));
7341             }
7342             switch (size) {
7343                case 0:
7344                   op_odd  = Q ? Iop_CatOddLanes8x16 : Iop_CatOddLanes8x8;
7345                   op_even = Q ? Iop_CatEvenLanes8x16 : Iop_CatEvenLanes8x8;
7346                   break;
7347                case 1:
7348                   op_odd  = Q ? Iop_CatOddLanes16x8 : Iop_CatOddLanes16x4;
7349                   op_even = Q ? Iop_CatEvenLanes16x8 : Iop_CatEvenLanes16x4;
7350                   break;
7351                case 2:
7352                   op_odd  = Iop_CatOddLanes32x4;
7353                   op_even = Iop_CatEvenLanes32x4;
7354                   break;
7355                case 3:
7356                   return False;
7357                default:
7358                   vassert(0);
7359             }
7360             assign(new_d, binop(op_even, mkexpr(old_m), mkexpr(old_d)));
7361             assign(new_m, binop(op_odd,  mkexpr(old_m), mkexpr(old_d)));
7362             if (Q) {
7363                putQReg(dreg, mkexpr(new_d), condT);
7364                putQReg(mreg, mkexpr(new_m), condT);
7365             } else {
7366                putDRegI64(dreg, mkexpr(new_d), condT);
7367                putDRegI64(mreg, mkexpr(new_m), condT);
7368             }
7369             DIP("vuzp.%d %c%u, %c%u\n",
7370                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7371             return True;
7372          } else if ((B >> 1) == 3) {
7373             /* VZIP */
7374             IROp op_lo, op_hi;
7375             IRTemp old_m, old_d, new_m, new_d;
7376             if (!Q && size == 2)
7377                return False;
7378             if (Q) {
7379                old_m = newTemp(Ity_V128);
7380                old_d = newTemp(Ity_V128);
7381                new_m = newTemp(Ity_V128);
7382                new_d = newTemp(Ity_V128);
7383                assign(old_m, getQReg(mreg));
7384                assign(old_d, getQReg(dreg));
7385             } else {
7386                old_m = newTemp(Ity_I64);
7387                old_d = newTemp(Ity_I64);
7388                new_m = newTemp(Ity_I64);
7389                new_d = newTemp(Ity_I64);
7390                assign(old_m, getDRegI64(mreg));
7391                assign(old_d, getDRegI64(dreg));
7392             }
7393             switch (size) {
7394                case 0:
7395                   op_hi = Q ? Iop_InterleaveHI8x16 : Iop_InterleaveHI8x8;
7396                   op_lo = Q ? Iop_InterleaveLO8x16 : Iop_InterleaveLO8x8;
7397                   break;
7398                case 1:
7399                   op_hi = Q ? Iop_InterleaveHI16x8 : Iop_InterleaveHI16x4;
7400                   op_lo = Q ? Iop_InterleaveLO16x8 : Iop_InterleaveLO16x4;
7401                   break;
7402                case 2:
7403                   op_hi = Iop_InterleaveHI32x4;
7404                   op_lo = Iop_InterleaveLO32x4;
7405                   break;
7406                case 3:
7407                   return False;
7408                default:
7409                   vassert(0);
7410             }
7411             assign(new_d, binop(op_lo, mkexpr(old_m), mkexpr(old_d)));
7412             assign(new_m, binop(op_hi, mkexpr(old_m), mkexpr(old_d)));
7413             if (Q) {
7414                putQReg(dreg, mkexpr(new_d), condT);
7415                putQReg(mreg, mkexpr(new_m), condT);
7416             } else {
7417                putDRegI64(dreg, mkexpr(new_d), condT);
7418                putDRegI64(mreg, mkexpr(new_m), condT);
7419             }
7420             DIP("vzip.%d %c%u, %c%u\n",
7421                 8 << size, Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7422             return True;
7423          } else if (B == 8) {
7424             /* VMOVN */
7425             IROp op;
7426             mreg >>= 1;
7427             switch (size) {
7428                case 0: op = Iop_NarrowUn16to8x8;  break;
7429                case 1: op = Iop_NarrowUn32to16x4; break;
7430                case 2: op = Iop_NarrowUn64to32x2; break;
7431                case 3: return False;
7432                default: vassert(0);
7433             }
7434             putDRegI64(dreg, unop(op, getQReg(mreg)), condT);
7435             DIP("vmovn.i%d d%u, q%u\n", 16 << size, dreg, mreg);
7436             return True;
7437          } else if (B == 9 || (B >> 1) == 5) {
7438             /* VQMOVN, VQMOVUN */
7439             IROp op, op2;
7440             IRTemp tmp;
7441             dreg = ((theInstr >> 18) & 0x10) | ((theInstr >> 12) & 0xF);
7442             mreg = ((theInstr >> 1) & 0x10) | (theInstr & 0xF);
7443             if (mreg & 1)
7444                return False;
7445             mreg >>= 1;
7446             switch (size) {
7447                case 0: op2 = Iop_NarrowUn16to8x8;  break;
7448                case 1: op2 = Iop_NarrowUn32to16x4; break;
7449                case 2: op2 = Iop_NarrowUn64to32x2; break;
7450                case 3: return False;
7451                default: vassert(0);
7452             }
7453             switch (B & 3) {
7454                case 0:
7455                   vassert(0);
7456                case 1:
7457                   switch (size) {
7458                      case 0: op = Iop_QNarrowUn16Sto8Ux8;  break;
7459                      case 1: op = Iop_QNarrowUn32Sto16Ux4; break;
7460                      case 2: op = Iop_QNarrowUn64Sto32Ux2; break;
7461                      case 3: return False;
7462                      default: vassert(0);
7463                   }
7464                   DIP("vqmovun.s%d d%u, q%u\n", 16 << size, dreg, mreg);
7465                   break;
7466                case 2:
7467                   switch (size) {
7468                      case 0: op = Iop_QNarrowUn16Sto8Sx8;  break;
7469                      case 1: op = Iop_QNarrowUn32Sto16Sx4; break;
7470                      case 2: op = Iop_QNarrowUn64Sto32Sx2; break;
7471                      case 3: return False;
7472                      default: vassert(0);
7473                   }
7474                   DIP("vqmovn.s%d d%u, q%u\n", 16 << size, dreg, mreg);
7475                   break;
7476                case 3:
7477                   switch (size) {
7478                      case 0: op = Iop_QNarrowUn16Uto8Ux8;  break;
7479                      case 1: op = Iop_QNarrowUn32Uto16Ux4; break;
7480                      case 2: op = Iop_QNarrowUn64Uto32Ux2; break;
7481                      case 3: return False;
7482                      default: vassert(0);
7483                   }
7484                   DIP("vqmovn.u%d d%u, q%u\n", 16 << size, dreg, mreg);
7485                   break;
7486                default:
7487                   vassert(0);
7488             }
7489             res = newTemp(Ity_I64);
7490             tmp = newTemp(Ity_I64);
7491             assign(res, unop(op, getQReg(mreg)));
7492             assign(tmp, unop(op2, getQReg(mreg)));
7493             setFlag_QC(mkexpr(res), mkexpr(tmp), False, condT);
7494             putDRegI64(dreg, mkexpr(res), condT);
7495             return True;
7496          } else if (B == 12) {
7497             /* VSHLL (maximum shift) */
7498             IROp op, cvt;
7499             UInt shift_imm;
7500             if (Q)
7501                return False;
7502             if (dreg & 1)
7503                return False;
7504             dreg >>= 1;
7505             shift_imm = 8 << size;
7506             res = newTemp(Ity_V128);
7507             switch (size) {
7508                case 0: op = Iop_ShlN16x8; cvt = Iop_Widen8Uto16x8;  break;
7509                case 1: op = Iop_ShlN32x4; cvt = Iop_Widen16Uto32x4; break;
7510                case 2: op = Iop_ShlN64x2; cvt = Iop_Widen32Uto64x2; break;
7511                case 3: return False;
7512                default: vassert(0);
7513             }
7514             assign(res, binop(op, unop(cvt, getDRegI64(mreg)),
7515                                   mkU8(shift_imm)));
7516             putQReg(dreg, mkexpr(res), condT);
7517             DIP("vshll.i%d q%u, d%u, #%d\n", 8 << size, dreg, mreg, 8 << size);
7518             return True;
7519          } else if ((B >> 3) == 3 && (B & 3) == 0) {
7520             /* VCVT (half<->single) */
7521             /* Half-precision extensions are needed to run this */
7522             vassert(0); // ATC
7523             if (((theInstr >> 18) & 3) != 1)
7524                return False;
7525             if ((theInstr >> 8) & 1) {
7526                if (dreg & 1)
7527                   return False;
7528                dreg >>= 1;
7529                putQReg(dreg, unop(Iop_F16toF32x4, getDRegI64(mreg)),
7530                      condT);
7531                DIP("vcvt.f32.f16 q%u, d%u\n", dreg, mreg);
7532             } else {
7533                if (mreg & 1)
7534                   return False;
7535                mreg >>= 1;
7536                putDRegI64(dreg, unop(Iop_F32toF16x4_DEP, getQReg(mreg)),
7537                                 condT);
7538                DIP("vcvt.f16.f32 d%u, q%u\n", dreg, mreg);
7539             }
7540             return True;
7541          } else {
7542             return False;
7543          }
7544          vassert(0);
7545          return True;
7546       case 3:
7547          if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,0)) {
7548             /* VRECPE */
7549             IROp op;
7550             F = (theInstr >> 8) & 1;
7551             if (size != 2)
7552                return False;
7553             if (Q) {
7554                op = F ? Iop_RecipEst32Fx4 : Iop_RecipEst32Ux4;
7555                putQReg(dreg, unop(op, getQReg(mreg)), condT);
7556                DIP("vrecpe.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7557             } else {
7558                op = F ? Iop_RecipEst32Fx2 : Iop_RecipEst32Ux2;
7559                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7560                DIP("vrecpe.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7561             }
7562             return True;
7563          } else if (((B >> 1) & BITS4(1,1,0,1)) == BITS4(1,0,0,1)) {
7564             /* VRSQRTE */
7565             IROp op;
7566             F = (B >> 2) & 1;
7567             if (size != 2)
7568                return False;
7569             if (F) {
7570                /* fp */
7571                op = Q ? Iop_RSqrtEst32Fx4 : Iop_RSqrtEst32Fx2;
7572             } else {
7573                /* unsigned int */
7574                op = Q ? Iop_RSqrtEst32Ux4 : Iop_RSqrtEst32Ux2;
7575             }
7576             if (Q) {
7577                putQReg(dreg, unop(op, getQReg(mreg)), condT);
7578                DIP("vrsqrte.%c32 q%u, q%u\n", F ? 'f' : 'u', dreg, mreg);
7579             } else {
7580                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7581                DIP("vrsqrte.%c32 d%u, d%u\n", F ? 'f' : 'u', dreg, mreg);
7582             }
7583             return True;
7584          } else if ((B >> 3) == 3) {
7585             /* VCVT (fp<->integer) */
7586             IROp op;
7587             if (size != 2)
7588                return False;
7589             switch ((B >> 1) & 3) {
7590                case 0:
7591                   op = Q ? Iop_I32StoF32x4_DEP : Iop_I32StoF32x2_DEP;
7592                   DIP("vcvt.f32.s32 %c%u, %c%u\n",
7593                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7594                   break;
7595                case 1:
7596                   op = Q ? Iop_I32UtoF32x4_DEP : Iop_I32UtoF32x2_DEP;
7597                   DIP("vcvt.f32.u32 %c%u, %c%u\n",
7598                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7599                   break;
7600                case 2:
7601                   op = Q ? Iop_F32toI32Sx4_RZ : Iop_F32toI32Sx2_RZ;
7602                   DIP("vcvt.s32.f32 %c%u, %c%u\n",
7603                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7604                   break;
7605                case 3:
7606                   op = Q ? Iop_F32toI32Ux4_RZ : Iop_F32toI32Ux2_RZ;
7607                   DIP("vcvt.u32.f32 %c%u, %c%u\n",
7608                       Q ? 'q' : 'd', dreg, Q ? 'q' : 'd', mreg);
7609                   break;
7610                default:
7611                   vassert(0);
7612             }
7613             if (Q) {
7614                putQReg(dreg, unop(op, getQReg(mreg)), condT);
7615             } else {
7616                putDRegI64(dreg, unop(op, getDRegI64(mreg)), condT);
7617             }
7618             return True;
7619          } else {
7620             return False;
7621          }
7622          vassert(0);
7623          return True;
7624       default:
7625          vassert(0);
7626    }
7627    return False;
7628 }
7629
7630 /* A7.4.6 One register and a modified immediate value */
7631 static
7632 void ppNeonImm(UInt imm, UInt cmode, UInt op)
7633 {
7634    int i;
7635    switch (cmode) {
7636       case 0: case 1: case 8: case 9:
7637          vex_printf("0x%x", imm);
7638          break;
7639       case 2: case 3: case 10: case 11:
7640          vex_printf("0x%x00", imm);
7641          break;
7642       case 4: case 5:
7643          vex_printf("0x%x0000", imm);
7644          break;
7645       case 6: case 7:
7646          vex_printf("0x%x000000", imm);
7647          break;
7648       case 12:
7649          vex_printf("0x%xff", imm);
7650          break;
7651       case 13:
7652          vex_printf("0x%xffff", imm);
7653          break;
7654       case 14:
7655          if (op) {
7656             vex_printf("0x");
7657             for (i = 7; i >= 0; i--)
7658                vex_printf("%s", (imm & (1 << i)) ? "ff" : "00");
7659          } else {
7660             vex_printf("0x%x", imm);
7661          }
7662          break;
7663       case 15:
7664          vex_printf("0x%x", imm);
7665          break;
7666    }
7667 }
7668
7669 static
7670 const char *ppNeonImmType(UInt cmode, UInt op)
7671 {
7672    switch (cmode) {
7673       case 0 ... 7:
7674       case 12: case 13:
7675          return "i32";
7676       case 8 ... 11:
7677          return "i16";
7678       case 14:
7679          if (op)
7680             return "i64";
7681          else
7682             return "i8";
7683       case 15:
7684          if (op)
7685             vassert(0);
7686          else
7687             return "f32";
7688       default:
7689          vassert(0);
7690    }
7691 }
7692
7693 static
7694 void DIPimm(UInt imm, UInt cmode, UInt op,
7695             const char *instr, UInt Q, UInt dreg)
7696 {
7697    if (vex_traceflags & VEX_TRACE_FE) {
7698       vex_printf("%s.%s %c%u, #", instr,
7699                  ppNeonImmType(cmode, op), Q ? 'q' : 'd', dreg);
7700       ppNeonImm(imm, cmode, op);
7701       vex_printf("\n");
7702    }
7703 }
7704
7705 static
7706 Bool dis_neon_data_1reg_and_imm ( UInt theInstr, IRTemp condT )
7707 {
7708    UInt dreg = get_neon_d_regno(theInstr);
7709    ULong imm_raw = ((theInstr >> 17) & 0x80) | ((theInstr >> 12) & 0x70) |
7710                   (theInstr & 0xf);
7711    ULong imm_raw_pp = imm_raw;
7712    UInt cmode = (theInstr >> 8) & 0xf;
7713    UInt op_bit = (theInstr >> 5) & 1;
7714    ULong imm = 0;
7715    UInt Q = (theInstr >> 6) & 1;
7716    int i, j;
7717    UInt tmp;
7718    IRExpr *imm_val;
7719    IRExpr *expr;
7720    IRTemp tmp_var;
7721    switch(cmode) {
7722       case 7: case 6:
7723          imm_raw = imm_raw << 8;
7724          /* fallthrough */
7725       case 5: case 4:
7726          imm_raw = imm_raw << 8;
7727          /* fallthrough */
7728       case 3: case 2:
7729          imm_raw = imm_raw << 8;
7730          /* fallthrough */
7731       case 0: case 1:
7732          imm = (imm_raw << 32) | imm_raw;
7733          break;
7734       case 11: case 10:
7735          imm_raw = imm_raw << 8;
7736          /* fallthrough */
7737       case 9: case 8:
7738          imm_raw = (imm_raw << 16) | imm_raw;
7739          imm = (imm_raw << 32) | imm_raw;
7740          break;
7741       case 13:
7742          imm_raw = (imm_raw << 8) | 0xff;
7743          /* fallthrough */
7744       case 12:
7745          imm_raw = (imm_raw << 8) | 0xff;
7746          imm = (imm_raw << 32) | imm_raw;
7747          break;
7748       case 14:
7749          if (! op_bit) {
7750             for(i = 0; i < 8; i++) {
7751                imm = (imm << 8) | imm_raw;
7752             }
7753          } else {
7754             for(i = 7; i >= 0; i--) {
7755                tmp = 0;
7756                for(j = 0; j < 8; j++) {
7757                   tmp = (tmp << 1) | ((imm_raw >> i) & 1);
7758                }
7759                imm = (imm << 8) | tmp;
7760             }
7761          }
7762          break;
7763       case 15:
7764          imm = (imm_raw & 0x80) << 5;
7765          imm |= ((~imm_raw & 0x40) << 5);
7766          for(i = 1; i <= 4; i++)
7767             imm |= (imm_raw & 0x40) << i;
7768          imm |= (imm_raw & 0x7f);
7769          imm = imm << 19;
7770          imm = (imm << 32) | imm;
7771          break;
7772       default:
7773          return False;
7774    }
7775    if (Q) {
7776       imm_val = binop(Iop_64HLtoV128, mkU64(imm), mkU64(imm));
7777    } else {
7778       imm_val = mkU64(imm);
7779    }
7780    if (((op_bit == 0) &&
7781       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 12) == 12))) ||
7782       ((op_bit == 1) && (cmode == 14))) {
7783       /* VMOV (immediate) */
7784       if (Q) {
7785          putQReg(dreg, imm_val, condT);
7786       } else {
7787          putDRegI64(dreg, imm_val, condT);
7788       }
7789       DIPimm(imm_raw_pp, cmode, op_bit, "vmov", Q, dreg);
7790       return True;
7791    }
7792    if ((op_bit == 1) &&
7793       (((cmode & 9) == 0) || ((cmode & 13) == 8) || ((cmode & 14) == 12))) {
7794       /* VMVN (immediate) */
7795       if (Q) {
7796          putQReg(dreg, unop(Iop_NotV128, imm_val), condT);
7797       } else {
7798          putDRegI64(dreg, unop(Iop_Not64, imm_val), condT);
7799       }
7800       DIPimm(imm_raw_pp, cmode, op_bit, "vmvn", Q, dreg);
7801       return True;
7802    }
7803    if (Q) {
7804       tmp_var = newTemp(Ity_V128);
7805       assign(tmp_var, getQReg(dreg));
7806    } else {
7807       tmp_var = newTemp(Ity_I64);
7808       assign(tmp_var, getDRegI64(dreg));
7809    }
7810    if ((op_bit == 0) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7811       /* VORR (immediate) */
7812       if (Q)
7813          expr = binop(Iop_OrV128, mkexpr(tmp_var), imm_val);
7814       else
7815          expr = binop(Iop_Or64, mkexpr(tmp_var), imm_val);
7816       DIPimm(imm_raw_pp, cmode, op_bit, "vorr", Q, dreg);
7817    } else if ((op_bit == 1) && (((cmode & 9) == 1) || ((cmode & 13) == 9))) {
7818       /* VBIC (immediate) */
7819       if (Q)
7820          expr = binop(Iop_AndV128, mkexpr(tmp_var),
7821                                    unop(Iop_NotV128, imm_val));
7822       else
7823          expr = binop(Iop_And64, mkexpr(tmp_var), unop(Iop_Not64, imm_val));
7824       DIPimm(imm_raw_pp, cmode, op_bit, "vbic", Q, dreg);
7825    } else {
7826       return False;
7827    }
7828    if (Q)
7829       putQReg(dreg, expr, condT);
7830    else
7831       putDRegI64(dreg, expr, condT);
7832    return True;
7833 }
7834
7835 /* A7.4 Advanced SIMD data-processing instructions */
7836 static
7837 Bool dis_neon_data_processing ( UInt theInstr, IRTemp condT )
7838 {
7839    UInt A = (theInstr >> 19) & 0x1F;
7840    UInt B = (theInstr >>  8) & 0xF;
7841    UInt C = (theInstr >>  4) & 0xF;
7842    UInt U = (theInstr >> 24) & 0x1;
7843
7844    if (! (A & 0x10)) {
7845       return dis_neon_data_3same(theInstr, condT);
7846    }
7847    if (((A & 0x17) == 0x10) && ((C & 0x9) == 0x1)) {
7848       return dis_neon_data_1reg_and_imm(theInstr, condT);
7849    }
7850    if ((C & 1) == 1) {
7851       return dis_neon_data_2reg_and_shift(theInstr, condT);
7852    }
7853    if (((C & 5) == 0) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7854       return dis_neon_data_3diff(theInstr, condT);
7855    }
7856    if (((C & 5) == 4) && (((A & 0x14) == 0x10) || ((A & 0x16) == 0x14))) {
7857       return dis_neon_data_2reg_and_scalar(theInstr, condT);
7858    }
7859    if ((A & 0x16) == 0x16) {
7860       if ((U == 0) && ((C & 1) == 0)) {
7861          return dis_neon_vext(theInstr, condT);
7862       }
7863       if ((U != 1) || ((C & 1) == 1))
7864          return False;
7865       if ((B & 8) == 0) {
7866          return dis_neon_data_2reg_misc(theInstr, condT);
7867       }
7868       if ((B & 12) == 8) {
7869          return dis_neon_vtb(theInstr, condT);
7870       }
7871       if ((B == 12) && ((C & 9) == 0)) {
7872          return dis_neon_vdup(theInstr, condT);
7873       }
7874    }
7875    return False;
7876 }
7877
7878
7879 /*------------------------------------------------------------*/
7880 /*--- NEON loads and stores                                ---*/
7881 /*------------------------------------------------------------*/
7882
7883 /* For NEON memory operations, we use the standard scheme to handle
7884    conditionalisation: generate a jump around the instruction if the
7885    condition is false.  That's only necessary in Thumb mode, however,
7886    since in ARM mode NEON instructions are unconditional. */
7887
7888 /* A helper function for what follows.  It assumes we already went
7889    uncond as per comments at the top of this section. */
7890 static
7891 void mk_neon_elem_load_to_one_lane( UInt rD, UInt inc, UInt index,
7892                                     UInt N, UInt size, IRTemp addr )
7893 {
7894    UInt i;
7895    switch (size) {
7896       case 0:
7897          putDRegI64(rD, triop(Iop_SetElem8x8, getDRegI64(rD), mkU8(index),
7898                     loadLE(Ity_I8, mkexpr(addr))), IRTemp_INVALID);
7899          break;
7900       case 1:
7901          putDRegI64(rD, triop(Iop_SetElem16x4, getDRegI64(rD), mkU8(index),
7902                     loadLE(Ity_I16, mkexpr(addr))), IRTemp_INVALID);
7903          break;
7904       case 2:
7905          putDRegI64(rD, triop(Iop_SetElem32x2, getDRegI64(rD), mkU8(index),
7906                     loadLE(Ity_I32, mkexpr(addr))), IRTemp_INVALID);
7907          break;
7908       default:
7909          vassert(0);
7910    }
7911    for (i = 1; i <= N; i++) {
7912       switch (size) {
7913          case 0:
7914             putDRegI64(rD + i * inc,
7915                        triop(Iop_SetElem8x8,
7916                              getDRegI64(rD + i * inc),
7917                              mkU8(index),
7918                              loadLE(Ity_I8, binop(Iop_Add32,
7919                                                   mkexpr(addr),
7920                                                   mkU32(i * 1)))),
7921                        IRTemp_INVALID);
7922             break;
7923          case 1:
7924             putDRegI64(rD + i * inc,
7925                        triop(Iop_SetElem16x4,
7926                              getDRegI64(rD + i * inc),
7927                              mkU8(index),
7928                              loadLE(Ity_I16, binop(Iop_Add32,
7929                                                    mkexpr(addr),
7930                                                    mkU32(i * 2)))),
7931                        IRTemp_INVALID);
7932             break;
7933          case 2:
7934             putDRegI64(rD + i * inc,
7935                        triop(Iop_SetElem32x2,
7936                              getDRegI64(rD + i * inc),
7937                              mkU8(index),
7938                              loadLE(Ity_I32, binop(Iop_Add32,
7939                                                    mkexpr(addr),
7940                                                    mkU32(i * 4)))),
7941                        IRTemp_INVALID);
7942             break;
7943          default:
7944             vassert(0);
7945       }
7946    }
7947 }
7948
7949 /* A(nother) helper function for what follows.  It assumes we already
7950    went uncond as per comments at the top of this section. */
7951 static
7952 void mk_neon_elem_store_from_one_lane( UInt rD, UInt inc, UInt index,
7953                                        UInt N, UInt size, IRTemp addr )
7954 {
7955    UInt i;
7956    switch (size) {
7957       case 0:
7958          storeLE(mkexpr(addr),
7959                  binop(Iop_GetElem8x8, getDRegI64(rD), mkU8(index)));
7960          break;
7961       case 1:
7962          storeLE(mkexpr(addr),
7963                  binop(Iop_GetElem16x4, getDRegI64(rD), mkU8(index)));
7964          break;
7965       case 2:
7966          storeLE(mkexpr(addr),
7967                  binop(Iop_GetElem32x2, getDRegI64(rD), mkU8(index)));
7968          break;
7969       default:
7970          vassert(0);
7971    }
7972    for (i = 1; i <= N; i++) {
7973       switch (size) {
7974          case 0:
7975             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 1)),
7976                     binop(Iop_GetElem8x8, getDRegI64(rD + i * inc),
7977                                           mkU8(index)));
7978             break;
7979          case 1:
7980             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 2)),
7981                     binop(Iop_GetElem16x4, getDRegI64(rD + i * inc),
7982                                            mkU8(index)));
7983             break;
7984          case 2:
7985             storeLE(binop(Iop_Add32, mkexpr(addr), mkU32(i * 4)),
7986                     binop(Iop_GetElem32x2, getDRegI64(rD + i * inc),
7987                                            mkU8(index)));
7988             break;
7989          default:
7990             vassert(0);
7991       }
7992    }
7993 }
7994
7995 /* Generate 2x64 -> 2x64 deinterleave code, for VLD2.  Caller must
7996    make *u0 and *u1 be valid IRTemps before the call. */
7997 static void math_DEINTERLEAVE_2 (/*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
7998                                  IRTemp i0, IRTemp i1, Int laneszB)
7999 {
8000    /* The following assumes that the guest is little endian, and hence
8001       that the memory-side (interleaved) data is stored
8002       little-endianly. */
8003    vassert(u0 && u1);
8004    /* This is pretty easy, since we have primitives directly to
8005       hand. */
8006    if (laneszB == 4) {
8007       // memLE(128 bits) == A0 B0 A1 B1
8008       // i0 == B0 A0, i1 == B1 A1
8009       // u0 == A1 A0, u1 == B1 B0
8010       assign(*u0, binop(Iop_InterleaveLO32x2, mkexpr(i1), mkexpr(i0)));
8011       assign(*u1, binop(Iop_InterleaveHI32x2, mkexpr(i1), mkexpr(i0)));
8012    } else if (laneszB == 2) {
8013       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
8014       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
8015       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
8016       assign(*u0, binop(Iop_CatEvenLanes16x4, mkexpr(i1), mkexpr(i0)));
8017       assign(*u1, binop(Iop_CatOddLanes16x4,  mkexpr(i1), mkexpr(i0)));
8018    } else if (laneszB == 1) {
8019       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
8020       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
8021       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
8022       assign(*u0, binop(Iop_CatEvenLanes8x8, mkexpr(i1), mkexpr(i0)));
8023       assign(*u1, binop(Iop_CatOddLanes8x8,  mkexpr(i1), mkexpr(i0)));
8024    } else {
8025       // Can never happen, since VLD2 only has valid lane widths of 32,
8026       // 16 or 8 bits.
8027       vpanic("math_DEINTERLEAVE_2");
8028    }
8029 }
8030
8031 /* Generate 2x64 -> 2x64 interleave code, for VST2.  Caller must make
8032    *u0 and *u1 be valid IRTemps before the call. */
8033 static void math_INTERLEAVE_2 (/*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
8034                                IRTemp u0, IRTemp u1, Int laneszB)
8035 {
8036    /* The following assumes that the guest is little endian, and hence
8037       that the memory-side (interleaved) data is stored
8038       little-endianly. */
8039    vassert(i0 && i1);
8040    /* This is pretty easy, since we have primitives directly to
8041       hand. */
8042    if (laneszB == 4) {
8043       // memLE(128 bits) == A0 B0 A1 B1
8044       // i0 == B0 A0, i1 == B1 A1
8045       // u0 == A1 A0, u1 == B1 B0
8046       assign(*i0, binop(Iop_InterleaveLO32x2, mkexpr(u1), mkexpr(u0)));
8047       assign(*i1, binop(Iop_InterleaveHI32x2, mkexpr(u1), mkexpr(u0)));
8048    } else if (laneszB == 2) {
8049       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3
8050       // i0 == B1 A1 B0 A0, i1 == B3 A3 B2 A2
8051       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0
8052       assign(*i0, binop(Iop_InterleaveLO16x4, mkexpr(u1), mkexpr(u0)));
8053       assign(*i1, binop(Iop_InterleaveHI16x4, mkexpr(u1), mkexpr(u0)));
8054    } else if (laneszB == 1) {
8055       // memLE(128 bits) == A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7
8056       // i0 == B3 A3 B2 A2 B1 A1 B0 A0, i1 == B7 A7 B6 A6 B5 A5 B4 A4
8057       // u0 == A7 A6 A5 A4 A3 A2 A1 A0, u1 == B7 B6 B5 B4 B3 B2 B1 B0
8058       assign(*i0, binop(Iop_InterleaveLO8x8, mkexpr(u1), mkexpr(u0)));
8059       assign(*i1, binop(Iop_InterleaveHI8x8, mkexpr(u1), mkexpr(u0)));
8060    } else {
8061       // Can never happen, since VST2 only has valid lane widths of 32,
8062       // 16 or 8 bits.
8063       vpanic("math_INTERLEAVE_2");
8064    }
8065 }
8066
8067 // Helper function for generating arbitrary slicing 'n' dicing of
8068 // 3 8x8 vectors, as needed for VLD3.8 and VST3.8.
8069 static IRExpr* math_PERM_8x8x3(const UChar* desc,
8070                                IRTemp s0, IRTemp s1, IRTemp s2)
8071 {
8072    // desc is an array of 8 pairs, encoded as 16 bytes,
8073    // that describe how to assemble the result lanes, starting with
8074    // lane 7.  Each pair is: first component (0..2) says which of
8075    // s0/s1/s2 to use.  Second component (0..7) is the lane number
8076    // in the source to use.
8077    UInt si;
8078    for (si = 0; si < 7; si++) {
8079       vassert(desc[2 * si + 0] <= 2);
8080       vassert(desc[2 * si + 1] <= 7);
8081    }
8082    IRTemp h3 = newTemp(Ity_I64);
8083    IRTemp h2 = newTemp(Ity_I64);
8084    IRTemp h1 = newTemp(Ity_I64);
8085    IRTemp h0 = newTemp(Ity_I64);
8086    IRTemp srcs[3] = {s0, s1, s2};
8087 #  define SRC_VEC(_lane)   mkexpr(srcs[desc[2 * (7-(_lane)) + 0]])
8088 #  define SRC_SHIFT(_lane) mkU8(56-8*(desc[2 * (7-(_lane)) + 1]))
8089    assign(h3, binop(Iop_InterleaveHI8x8,
8090                     binop(Iop_Shl64, SRC_VEC(7), SRC_SHIFT(7)),
8091                     binop(Iop_Shl64, SRC_VEC(6), SRC_SHIFT(6))));
8092    assign(h2, binop(Iop_InterleaveHI8x8,
8093                     binop(Iop_Shl64, SRC_VEC(5), SRC_SHIFT(5)),
8094                     binop(Iop_Shl64, SRC_VEC(4), SRC_SHIFT(4))));
8095    assign(h1, binop(Iop_InterleaveHI8x8,
8096                     binop(Iop_Shl64, SRC_VEC(3), SRC_SHIFT(3)),
8097                     binop(Iop_Shl64, SRC_VEC(2), SRC_SHIFT(2))));
8098    assign(h0, binop(Iop_InterleaveHI8x8,
8099                     binop(Iop_Shl64, SRC_VEC(1), SRC_SHIFT(1)),
8100                     binop(Iop_Shl64, SRC_VEC(0), SRC_SHIFT(0))));
8101 #  undef SRC_VEC
8102 #  undef SRC_SHIFT
8103    // Now h3..h0 are 64 bit vectors with useful information only
8104    // in the top 16 bits.  We now concatentate those four 16-bit
8105    // groups so as to produce the final result.
8106    IRTemp w1 = newTemp(Ity_I64);
8107    IRTemp w0 = newTemp(Ity_I64);
8108    assign(w1, binop(Iop_InterleaveHI16x4, mkexpr(h3), mkexpr(h2)));
8109    assign(w0, binop(Iop_InterleaveHI16x4, mkexpr(h1), mkexpr(h0)));
8110    return binop(Iop_InterleaveHI32x2, mkexpr(w1), mkexpr(w0));
8111 }
8112
8113 /* Generate 3x64 -> 3x64 deinterleave code, for VLD3.  Caller must
8114    make *u0, *u1 and *u2 be valid IRTemps before the call. */
8115 static void math_DEINTERLEAVE_3 (
8116                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1, /*OUT*/IRTemp* u2,
8117                IRTemp i0, IRTemp i1, IRTemp i2, Int laneszB
8118             )
8119 {
8120 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8121 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8122 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8123    /* The following assumes that the guest is little endian, and hence
8124       that the memory-side (interleaved) data is stored
8125       little-endianly. */
8126    vassert(u0 && u1 && u2);
8127    if (laneszB == 4) {
8128       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8129       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8130       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8131       assign(*u0, IHI32x2(SHL64(i1,  0), SHL64(i0, 32)));
8132       assign(*u1, IHI32x2(SHL64(i2, 32), SHL64(i0,  0)));
8133       assign(*u2, IHI32x2(SHL64(i2,  0), SHL64(i1, 32)));
8134    } else if (laneszB == 2) {
8135       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8136       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8137       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8138 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8139                 IHI32x2(                                      \
8140                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8141                            SHL64((_tmp2),48-16*(_la2))),      \
8142                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8143                            SHL64((_tmp0),48-16*(_la0))))
8144       assign(*u0, XXX(i2,1, i1,2, i0,3, i0,0));
8145       assign(*u1, XXX(i2,2, i1,3, i1,0, i0,1));
8146       assign(*u2, XXX(i2,3, i2,0, i1,1, i0,2));
8147 #     undef XXX
8148    } else if (laneszB == 1) {
8149       // These describe how the result vectors [7..0] are
8150       // assembled from the source vectors.  Each pair is
8151       // (source vector number, lane number).
8152       static const UChar de0[16] = {2,5, 2,2, 1,7, 1,4, 1,1, 0,6, 0,3, 0,0};
8153       static const UChar de1[16] = {2,6, 2,3, 2,0, 1,5, 1,2, 0,7, 0,4, 0,1};
8154       static const UChar de2[16] = {2,7, 2,4, 2,1, 1,6, 1,3, 1,0, 0,5, 0,2};
8155       assign(*u0, math_PERM_8x8x3(de0, i0, i1, i2));
8156       assign(*u1, math_PERM_8x8x3(de1, i0, i1, i2));
8157       assign(*u2, math_PERM_8x8x3(de2, i0, i1, i2));
8158    } else {
8159       // Can never happen, since VLD3 only has valid lane widths of 32,
8160       // 16 or 8 bits.
8161       vpanic("math_DEINTERLEAVE_3");
8162    }
8163 #  undef SHL64
8164 #  undef IHI16x4
8165 #  undef IHI32x2
8166 }
8167
8168 /* Generate 3x64 -> 3x64 interleave code, for VST3.  Caller must
8169    make *i0, *i1 and *i2 be valid IRTemps before the call. */
8170 static void math_INTERLEAVE_3 (
8171                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1, /*OUT*/IRTemp* i2,
8172                IRTemp u0, IRTemp u1, IRTemp u2, Int laneszB
8173             )
8174 {
8175 #  define IHI32x2(_e1, _e2) binop(Iop_InterleaveHI32x2, (_e1), (_e2))
8176 #  define IHI16x4(_e1, _e2) binop(Iop_InterleaveHI16x4, (_e1), (_e2))
8177 #  define SHL64(_tmp, _amt) binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8178    /* The following assumes that the guest is little endian, and hence
8179       that the memory-side (interleaved) data is stored
8180       little-endianly. */
8181    vassert(i0 && i1 && i2);
8182    if (laneszB == 4) {
8183       // memLE(192 bits) == A0 B0 C0 A1 B1 C1
8184       // i0 == B0 A0, i1 == A1 C0, i2 == C1 B1
8185       // u0 == A1 A0, u1 == B1 B0, u2 == C1 C0
8186       assign(*i0, IHI32x2(SHL64(u1, 32), SHL64(u0, 32)));
8187       assign(*i1, IHI32x2(SHL64(u0,  0), SHL64(u2, 32)));
8188       assign(*i2, IHI32x2(SHL64(u2,  0), SHL64(u1,  0)));
8189    } else if (laneszB == 2) {
8190       // memLE(192 bits) == A0 B0 C0 A1, B1 C1 A2 B2, C2 A3 B3 C3
8191       // i0 == A1 C0 B0 A0, i1 == B2 A2 C1 B1, i2 == C3 B3 A3 C2
8192       // u0 == A3 A2 A1 A0, u1 == B3 B2 B1 B0, u2 == C3 C2 C1 C0
8193 #     define XXX(_tmp3,_la3,_tmp2,_la2,_tmp1,_la1,_tmp0,_la0) \
8194                 IHI32x2(                                      \
8195                    IHI16x4(SHL64((_tmp3),48-16*(_la3)),       \
8196                            SHL64((_tmp2),48-16*(_la2))),      \
8197                    IHI16x4(SHL64((_tmp1),48-16*(_la1)),       \
8198                            SHL64((_tmp0),48-16*(_la0))))
8199       assign(*i0, XXX(u0,1, u2,0, u1,0, u0,0));
8200       assign(*i1, XXX(u1,2, u0,2, u2,1, u1,1));
8201       assign(*i2, XXX(u2,3, u1,3, u0,3, u2,2));
8202 #     undef XXX
8203    } else if (laneszB == 1) {
8204       // These describe how the result vectors [7..0] are
8205       // assembled from the source vectors.  Each pair is
8206       // (source vector number, lane number).
8207       static const UChar in0[16] = {1,2, 0,2, 2,1, 1,1, 0,1, 2,0, 1,0, 0,0};
8208       static const UChar in1[16] = {0,5, 2,4, 1,4, 0,4, 2,3, 1,3, 0,3, 2,2};
8209       static const UChar in2[16] = {2,7, 1,7, 0,7, 2,6, 1,6, 0,6, 2,5, 1,5};
8210       assign(*i0, math_PERM_8x8x3(in0, u0, u1, u2));
8211       assign(*i1, math_PERM_8x8x3(in1, u0, u1, u2));
8212       assign(*i2, math_PERM_8x8x3(in2, u0, u1, u2));
8213    } else {
8214       // Can never happen, since VST3 only has valid lane widths of 32,
8215       // 16 or 8 bits.
8216       vpanic("math_INTERLEAVE_3");
8217    }
8218 #  undef SHL64
8219 #  undef IHI16x4
8220 #  undef IHI32x2
8221 }
8222
8223 /* Generate 4x64 -> 4x64 deinterleave code, for VLD4.  Caller must
8224    make *u0, *u1, *u2 and *u3 be valid IRTemps before the call. */
8225 static void math_DEINTERLEAVE_4 (
8226                /*OUT*/IRTemp* u0, /*OUT*/IRTemp* u1,
8227                /*OUT*/IRTemp* u2, /*OUT*/IRTemp* u3,
8228                IRTemp i0, IRTemp i1, IRTemp i2, IRTemp i3, Int laneszB
8229             )
8230 {
8231 #  define IHI32x2(_t1, _t2) \
8232              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8233 #  define ILO32x2(_t1, _t2) \
8234              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8235 #  define IHI16x4(_t1, _t2) \
8236              binop(Iop_InterleaveHI16x4, mkexpr(_t1), mkexpr(_t2))
8237 #  define ILO16x4(_t1, _t2) \
8238              binop(Iop_InterleaveLO16x4, mkexpr(_t1), mkexpr(_t2))
8239 #  define IHI8x8(_t1, _e2) \
8240              binop(Iop_InterleaveHI8x8, mkexpr(_t1), _e2)
8241 #  define SHL64(_tmp, _amt) \
8242              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8243    /* The following assumes that the guest is little endian, and hence
8244       that the memory-side (interleaved) data is stored
8245       little-endianly. */
8246    vassert(u0 && u1 && u2 && u3);
8247    if (laneszB == 4) {
8248       assign(*u0, ILO32x2(i2, i0));
8249       assign(*u1, IHI32x2(i2, i0));
8250       assign(*u2, ILO32x2(i3, i1));
8251       assign(*u3, IHI32x2(i3, i1));
8252    } else if (laneszB == 2) {
8253       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8254       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8255       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8256       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8257       assign(b1b0a1a0, ILO16x4(i1, i0));
8258       assign(b3b2a3a2, ILO16x4(i3, i2));
8259       assign(d1d0c1c0, IHI16x4(i1, i0));
8260       assign(d3d2c3c2, IHI16x4(i3, i2));
8261       // And now do what we did for the 32-bit case.
8262       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8263       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8264       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8265       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8266    } else if (laneszB == 1) {
8267       // Deinterleave into 16-bit chunks, then do as the 16-bit case.
8268       IRTemp i0x = newTemp(Ity_I64);
8269       IRTemp i1x = newTemp(Ity_I64);
8270       IRTemp i2x = newTemp(Ity_I64);
8271       IRTemp i3x = newTemp(Ity_I64);
8272       assign(i0x, IHI8x8(i0, SHL64(i0, 32)));
8273       assign(i1x, IHI8x8(i1, SHL64(i1, 32)));
8274       assign(i2x, IHI8x8(i2, SHL64(i2, 32)));
8275       assign(i3x, IHI8x8(i3, SHL64(i3, 32)));
8276       // From here on is like the 16 bit case.
8277       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8278       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8279       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8280       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8281       assign(b1b0a1a0, ILO16x4(i1x, i0x));
8282       assign(b3b2a3a2, ILO16x4(i3x, i2x));
8283       assign(d1d0c1c0, IHI16x4(i1x, i0x));
8284       assign(d3d2c3c2, IHI16x4(i3x, i2x));
8285       // And now do what we did for the 32-bit case.
8286       assign(*u0, ILO32x2(b3b2a3a2, b1b0a1a0));
8287       assign(*u1, IHI32x2(b3b2a3a2, b1b0a1a0));
8288       assign(*u2, ILO32x2(d3d2c3c2, d1d0c1c0));
8289       assign(*u3, IHI32x2(d3d2c3c2, d1d0c1c0));
8290    } else {
8291       // Can never happen, since VLD4 only has valid lane widths of 32,
8292       // 16 or 8 bits.
8293       vpanic("math_DEINTERLEAVE_4");
8294    }
8295 #  undef SHL64
8296 #  undef IHI8x8
8297 #  undef ILO16x4
8298 #  undef IHI16x4
8299 #  undef ILO32x2
8300 #  undef IHI32x2
8301 }
8302
8303 /* Generate 4x64 -> 4x64 interleave code, for VST4.  Caller must
8304    make *i0, *i1, *i2 and *i3 be valid IRTemps before the call. */
8305 static void math_INTERLEAVE_4 (
8306                /*OUT*/IRTemp* i0, /*OUT*/IRTemp* i1,
8307                /*OUT*/IRTemp* i2, /*OUT*/IRTemp* i3,
8308                IRTemp u0, IRTemp u1, IRTemp u2, IRTemp u3, Int laneszB
8309             )
8310 {
8311 #  define IHI32x2(_t1, _t2) \
8312              binop(Iop_InterleaveHI32x2, mkexpr(_t1), mkexpr(_t2))
8313 #  define ILO32x2(_t1, _t2) \
8314              binop(Iop_InterleaveLO32x2, mkexpr(_t1), mkexpr(_t2))
8315 #  define CEV16x4(_t1, _t2) \
8316              binop(Iop_CatEvenLanes16x4, mkexpr(_t1), mkexpr(_t2))
8317 #  define COD16x4(_t1, _t2) \
8318              binop(Iop_CatOddLanes16x4, mkexpr(_t1), mkexpr(_t2))
8319 #  define COD8x8(_t1, _e2) \
8320              binop(Iop_CatOddLanes8x8, mkexpr(_t1), _e2)
8321 #  define SHL64(_tmp, _amt) \
8322              binop(Iop_Shl64, mkexpr(_tmp), mkU8(_amt))
8323    /* The following assumes that the guest is little endian, and hence
8324       that the memory-side (interleaved) data is stored
8325       little-endianly. */
8326    vassert(u0 && u1 && u2 && u3);
8327    if (laneszB == 4) {
8328       assign(*i0, ILO32x2(u1, u0));
8329       assign(*i1, ILO32x2(u3, u2));
8330       assign(*i2, IHI32x2(u1, u0));
8331       assign(*i3, IHI32x2(u3, u2));
8332    } else if (laneszB == 2) {
8333       // First, interleave at the 32-bit lane size.
8334       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8335       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8336       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8337       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8338       assign(b1b0a1a0, ILO32x2(u1, u0));
8339       assign(b3b2a3a2, IHI32x2(u1, u0));
8340       assign(d1d0c1c0, ILO32x2(u3, u2));
8341       assign(d3d2c3c2, IHI32x2(u3, u2));
8342       // And interleave (cat) at the 16 bit size.
8343       assign(*i0, CEV16x4(d1d0c1c0, b1b0a1a0));
8344       assign(*i1, COD16x4(d1d0c1c0, b1b0a1a0));
8345       assign(*i2, CEV16x4(d3d2c3c2, b3b2a3a2));
8346       assign(*i3, COD16x4(d3d2c3c2, b3b2a3a2));
8347    } else if (laneszB == 1) {
8348       // First, interleave at the 32-bit lane size.
8349       IRTemp b1b0a1a0 = newTemp(Ity_I64);
8350       IRTemp b3b2a3a2 = newTemp(Ity_I64);
8351       IRTemp d1d0c1c0 = newTemp(Ity_I64);
8352       IRTemp d3d2c3c2 = newTemp(Ity_I64);
8353       assign(b1b0a1a0, ILO32x2(u1, u0));
8354       assign(b3b2a3a2, IHI32x2(u1, u0));
8355       assign(d1d0c1c0, ILO32x2(u3, u2));
8356       assign(d3d2c3c2, IHI32x2(u3, u2));
8357       // And interleave (cat) at the 16 bit size.
8358       IRTemp i0x = newTemp(Ity_I64);
8359       IRTemp i1x = newTemp(Ity_I64);
8360       IRTemp i2x = newTemp(Ity_I64);
8361       IRTemp i3x = newTemp(Ity_I64);
8362       assign(i0x, CEV16x4(d1d0c1c0, b1b0a1a0));
8363       assign(i1x, COD16x4(d1d0c1c0, b1b0a1a0));
8364       assign(i2x, CEV16x4(d3d2c3c2, b3b2a3a2));
8365       assign(i3x, COD16x4(d3d2c3c2, b3b2a3a2));
8366       // And rearrange within each word, to get the right 8 bit lanes.
8367       assign(*i0, COD8x8(i0x, SHL64(i0x, 8)));
8368       assign(*i1, COD8x8(i1x, SHL64(i1x, 8)));
8369       assign(*i2, COD8x8(i2x, SHL64(i2x, 8)));
8370       assign(*i3, COD8x8(i3x, SHL64(i3x, 8)));
8371    } else {
8372       // Can never happen, since VLD4 only has valid lane widths of 32,
8373       // 16 or 8 bits.
8374       vpanic("math_DEINTERLEAVE_4");
8375    }
8376 #  undef SHL64
8377 #  undef COD8x8
8378 #  undef COD16x4
8379 #  undef CEV16x4
8380 #  undef ILO32x2
8381 #  undef IHI32x2
8382 }
8383
8384 /* A7.7 Advanced SIMD element or structure load/store instructions */
8385 static
8386 Bool dis_neon_load_or_store ( UInt theInstr,
8387                               Bool isT, IRTemp condT )
8388 {
8389 #  define INSN(_bMax,_bMin)  SLICE_UInt(theInstr, (_bMax), (_bMin))
8390    UInt bA = INSN(23,23);
8391    UInt fB = INSN(11,8);
8392    UInt bL = INSN(21,21);
8393    UInt rD = (INSN(22,22) << 4) | INSN(15,12);
8394    UInt rN = INSN(19,16);
8395    UInt rM = INSN(3,0);
8396    UInt N, size, i, j;
8397    UInt inc;
8398    UInt regs = 1;
8399
8400    if (isT) {
8401       vassert(condT != IRTemp_INVALID);
8402    } else {
8403       vassert(condT == IRTemp_INVALID);
8404    }
8405    /* So now, if condT is not IRTemp_INVALID, we know we're
8406       dealing with Thumb code. */
8407
8408    if (INSN(20,20) != 0)
8409       return False;
8410
8411    IRTemp initialRn = newTemp(Ity_I32);
8412    assign(initialRn, isT ? getIRegT(rN) : getIRegA(rN));
8413
8414    IRTemp initialRm = newTemp(Ity_I32);
8415    assign(initialRm, isT ? getIRegT(rM) : getIRegA(rM));
8416
8417    /* There are 3 cases:
8418       (1) VSTn / VLDn (n-element structure from/to one lane)
8419       (2) VLDn (single element to all lanes)
8420       (3) VSTn / VLDn (multiple n-element structures)
8421    */
8422    if (bA) {
8423       N = fB & 3;
8424       if ((fB >> 2) < 3) {
8425          /* ------------ Case (1) ------------
8426             VSTn / VLDn (n-element structure from/to one lane) */
8427
8428          size = fB >> 2;
8429
8430          switch (size) {
8431             case 0: i = INSN(7,5); inc = 1; break;
8432             case 1: i = INSN(7,6); inc = INSN(5,5) ? 2 : 1; break;
8433             case 2: i = INSN(7,7); inc = INSN(6,6) ? 2 : 1; break;
8434             case 3: return False;
8435             default: vassert(0);
8436          }
8437
8438          IRTemp addr = newTemp(Ity_I32);
8439          assign(addr, mkexpr(initialRn));
8440
8441          // go uncond
8442          if (condT != IRTemp_INVALID)
8443             mk_skip_over_T32_if_cond_is_false(condT);
8444          // now uncond
8445
8446          if (bL)
8447             mk_neon_elem_load_to_one_lane(rD, inc, i, N, size, addr);
8448          else
8449             mk_neon_elem_store_from_one_lane(rD, inc, i, N, size, addr);
8450          DIP("v%s%u.%d {", bL ? "ld" : "st", N + 1, 8 << size);
8451          for (j = 0; j <= N; j++) {
8452             if (j)
8453                DIP(", ");
8454             DIP("d%u[%u]", rD + j * inc, i);
8455          }
8456          DIP("}, [r%u]", rN);
8457          if (rM != 13 && rM != 15) {
8458             DIP(", r%u\n", rM);
8459          } else {
8460             DIP("%s\n", (rM != 15) ? "!" : "");
8461          }
8462       } else {
8463          /* ------------ Case (2) ------------
8464             VLDn (single element to all lanes) */
8465          UInt r;
8466          if (bL == 0)
8467             return False;
8468
8469          inc = INSN(5,5) + 1;
8470          size = INSN(7,6);
8471
8472          /* size == 3 and size == 2 cases differ in alignment constraints */
8473          if (size == 3 && N == 3 && INSN(4,4) == 1)
8474             size = 2;
8475
8476          if (size == 0 && N == 0 && INSN(4,4) == 1)
8477             return False;
8478          if (N == 2 && INSN(4,4) == 1)
8479             return False;
8480          if (size == 3)
8481             return False;
8482
8483          // go uncond
8484          if (condT != IRTemp_INVALID)
8485             mk_skip_over_T32_if_cond_is_false(condT);
8486          // now uncond
8487
8488          IRTemp addr = newTemp(Ity_I32);
8489          assign(addr, mkexpr(initialRn));
8490
8491          if (N == 0 && INSN(5,5))
8492             regs = 2;
8493
8494          for (r = 0; r < regs; r++) {
8495             switch (size) {
8496                case 0:
8497                   putDRegI64(rD + r, unop(Iop_Dup8x8,
8498                                           loadLE(Ity_I8, mkexpr(addr))),
8499                              IRTemp_INVALID);
8500                   break;
8501                case 1:
8502                   putDRegI64(rD + r, unop(Iop_Dup16x4,
8503                                           loadLE(Ity_I16, mkexpr(addr))),
8504                              IRTemp_INVALID);
8505                   break;
8506                case 2:
8507                   putDRegI64(rD + r, unop(Iop_Dup32x2,
8508                                           loadLE(Ity_I32, mkexpr(addr))),
8509                              IRTemp_INVALID);
8510                   break;
8511                default:
8512                   vassert(0);
8513             }
8514             for (i = 1; i <= N; i++) {
8515                switch (size) {
8516                   case 0:
8517                      putDRegI64(rD + r + i * inc,
8518                                 unop(Iop_Dup8x8,
8519                                      loadLE(Ity_I8, binop(Iop_Add32,
8520                                                           mkexpr(addr),
8521                                                           mkU32(i * 1)))),
8522                                 IRTemp_INVALID);
8523                      break;
8524                   case 1:
8525                      putDRegI64(rD + r + i * inc,
8526                                 unop(Iop_Dup16x4,
8527                                      loadLE(Ity_I16, binop(Iop_Add32,
8528                                                            mkexpr(addr),
8529                                                            mkU32(i * 2)))),
8530                                 IRTemp_INVALID);
8531                      break;
8532                   case 2:
8533                      putDRegI64(rD + r + i * inc,
8534                                 unop(Iop_Dup32x2,
8535                                      loadLE(Ity_I32, binop(Iop_Add32,
8536                                                            mkexpr(addr),
8537                                                            mkU32(i * 4)))),
8538                                 IRTemp_INVALID);
8539                      break;
8540                   default:
8541                      vassert(0);
8542                }
8543             }
8544          }
8545          DIP("vld%u.%d {", N + 1, 8 << size);
8546          for (r = 0; r < regs; r++) {
8547             for (i = 0; i <= N; i++) {
8548                if (i || r)
8549                   DIP(", ");
8550                DIP("d%u[]", rD + r + i * inc);
8551             }
8552          }
8553          DIP("}, [r%u]", rN);
8554          if (rM != 13 && rM != 15) {
8555             DIP(", r%u\n", rM);
8556          } else {
8557             DIP("%s\n", (rM != 15) ? "!" : "");
8558          }
8559       }
8560       /* Writeback.  We're uncond here, so no condT-ing. */
8561       if (rM != 15) {
8562          if (rM == 13) {
8563             IRExpr* e = binop(Iop_Add32,
8564                               mkexpr(initialRn),
8565                               mkU32((1 << size) * (N + 1)));
8566             if (isT)
8567                putIRegT(rN, e, IRTemp_INVALID);
8568             else
8569                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8570          } else {
8571             IRExpr* e = binop(Iop_Add32,
8572                               mkexpr(initialRn),
8573                               mkexpr(initialRm));
8574             if (isT)
8575                putIRegT(rN, e, IRTemp_INVALID);
8576             else
8577                putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8578          }
8579       }
8580       return True;
8581    } else {
8582       /* ------------ Case (3) ------------
8583          VSTn / VLDn (multiple n-element structures) */
8584       inc = (fB & 1) + 1;
8585
8586       if (fB == BITS4(0,0,1,0)       // Dd, Dd+1, Dd+2, Dd+3  inc = 1  regs = 4
8587           || fB == BITS4(0,1,1,0)    // Dd, Dd+1, Dd+2        inc = 1  regs = 3
8588           || fB == BITS4(0,1,1,1)    // Dd                    inc = 2  regs = 1
8589           || fB == BITS4(1,0,1,0)) { // Dd, Dd+1              inc = 1  regs = 2
8590          N = 0; // VLD1/VST1.  'inc' does not appear to have any
8591                 // meaning for the VLD1/VST1 cases.  'regs' is the number of
8592                 // registers involved.
8593          if (rD + regs > 32) return False;
8594       }
8595       else
8596       if (fB == BITS4(0,0,1,1)       // Dd, Dd+1, Dd+2, Dd+3  inc=2  regs = 2
8597           || fB == BITS4(1,0,0,0)    // Dd, Dd+1              inc=1  regs = 1
8598           || fB == BITS4(1,0,0,1)) { // Dd, Dd+2              inc=2  regs = 1
8599          N = 1; // VLD2/VST2.  'regs' is the number of register-pairs involved
8600          if (regs == 1 && inc == 1 && rD + 1 >= 32) return False;
8601          if (regs == 1 && inc == 2 && rD + 2 >= 32) return False;
8602          if (regs == 2 && inc == 2 && rD + 3 >= 32) return False;
8603       } else if (fB == BITS4(0,1,0,0) || fB == BITS4(0,1,0,1)) {
8604          N = 2; // VLD3/VST3
8605          if (inc == 1 && rD + 2 >= 32) return False;
8606          if (inc == 2 && rD + 4 >= 32) return False;
8607       } else if (fB == BITS4(0,0,0,0) || fB == BITS4(0,0,0,1)) {
8608          N = 3; // VLD4/VST4
8609          if (inc == 1 && rD + 3 >= 32) return False;
8610          if (inc == 2 && rD + 6 >= 32) return False;
8611       } else {
8612          return False;
8613       }
8614
8615       if (N == 1 && fB == BITS4(0,0,1,1)) {
8616          regs = 2;
8617       } else if (N == 0) {
8618          if (fB == BITS4(1,0,1,0)) {
8619             regs = 2;
8620          } else if (fB == BITS4(0,1,1,0)) {
8621             regs = 3;
8622          } else if (fB == BITS4(0,0,1,0)) {
8623             regs = 4;
8624          }
8625       }
8626
8627       size = INSN(7,6);
8628       if (N == 0 && size == 3)
8629          size = 2;
8630       if (size == 3)
8631          return False;
8632
8633       // go uncond
8634       if (condT != IRTemp_INVALID)
8635          mk_skip_over_T32_if_cond_is_false(condT);
8636       // now uncond
8637
8638       IRTemp addr = newTemp(Ity_I32);
8639       assign(addr, mkexpr(initialRn));
8640
8641       if (N == 0 /* No interleaving -- VLD1/VST1 */) {
8642          UInt r;
8643          vassert(regs == 1 || regs == 2 || regs == 3 || regs == 4);
8644          /* inc has no relevance here */
8645          for (r = 0; r < regs; r++) {
8646             if (bL)
8647                putDRegI64(rD+r, loadLE(Ity_I64, mkexpr(addr)), IRTemp_INVALID);
8648             else
8649                storeLE(mkexpr(addr), getDRegI64(rD+r));
8650             IRTemp tmp = newTemp(Ity_I32);
8651             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(8)));
8652             addr = tmp;
8653          }
8654       }
8655       else
8656       if (N == 1 /* 2-interleaving -- VLD2/VST2 */) {
8657          vassert( (regs == 1 && (inc == 1 || inc == 2))
8658                    || (regs == 2 && inc == 2) );
8659          // Make 'nregs' be the number of registers and 'regstep'
8660          // equal the actual register-step.  The ARM encoding, using 'regs'
8661          // and 'inc', is bizarre.  After this, we have:
8662          // Dd, Dd+1              regs = 1, inc = 1,   nregs = 2, regstep = 1
8663          // Dd, Dd+2              regs = 1, inc = 2,   nregs = 2, regstep = 2
8664          // Dd, Dd+1, Dd+2, Dd+3  regs = 2, inc = 2,   nregs = 4, regstep = 1
8665          UInt nregs   = 2;
8666          UInt regstep = 1;
8667          if (regs == 1 && inc == 1) {
8668             /* nothing */
8669          } else if (regs == 1 && inc == 2) {
8670             regstep = 2;
8671          } else if (regs == 2 && inc == 2) {
8672             nregs = 4;
8673          } else {
8674             vassert(0);
8675          }
8676          // 'a' is address,
8677          // 'di' is interleaved data, 'du' is uninterleaved data
8678          if (nregs == 2) {
8679             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8680             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8681             IRTemp  di0 = newTemp(Ity_I64);
8682             IRTemp  di1 = newTemp(Ity_I64);
8683             IRTemp  du0 = newTemp(Ity_I64);
8684             IRTemp  du1 = newTemp(Ity_I64);
8685             if (bL) {
8686                assign(di0, loadLE(Ity_I64, a0));
8687                assign(di1, loadLE(Ity_I64, a1));
8688                math_DEINTERLEAVE_2(&du0, &du1, di0, di1, 1 << size);
8689                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8690                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8691             } else {
8692                assign(du0, getDRegI64(rD + 0 * regstep));
8693                assign(du1, getDRegI64(rD + 1 * regstep));
8694                math_INTERLEAVE_2(&di0, &di1, du0, du1, 1 << size);
8695                storeLE(a0, mkexpr(di0));
8696                storeLE(a1, mkexpr(di1));
8697             }
8698             IRTemp tmp = newTemp(Ity_I32);
8699             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(16)));
8700             addr = tmp;
8701          } else {
8702             vassert(nregs == 4);
8703             vassert(regstep == 1);
8704             IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8705             IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8706             IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8707             IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8708             IRTemp  di0 = newTemp(Ity_I64);
8709             IRTemp  di1 = newTemp(Ity_I64);
8710             IRTemp  di2 = newTemp(Ity_I64);
8711             IRTemp  di3 = newTemp(Ity_I64);
8712             IRTemp  du0 = newTemp(Ity_I64);
8713             IRTemp  du1 = newTemp(Ity_I64);
8714             IRTemp  du2 = newTemp(Ity_I64);
8715             IRTemp  du3 = newTemp(Ity_I64);
8716             if (bL) {
8717                assign(di0, loadLE(Ity_I64, a0));
8718                assign(di1, loadLE(Ity_I64, a1));
8719                assign(di2, loadLE(Ity_I64, a2));
8720                assign(di3, loadLE(Ity_I64, a3));
8721                // Note spooky interleaving: du0, du2, di0, di1 etc
8722                math_DEINTERLEAVE_2(&du0, &du2, di0, di1, 1 << size);
8723                math_DEINTERLEAVE_2(&du1, &du3, di2, di3, 1 << size);
8724                putDRegI64(rD + 0 * regstep, mkexpr(du0), IRTemp_INVALID);
8725                putDRegI64(rD + 1 * regstep, mkexpr(du1), IRTemp_INVALID);
8726                putDRegI64(rD + 2 * regstep, mkexpr(du2), IRTemp_INVALID);
8727                putDRegI64(rD + 3 * regstep, mkexpr(du3), IRTemp_INVALID);
8728             } else {
8729                assign(du0, getDRegI64(rD + 0 * regstep));
8730                assign(du1, getDRegI64(rD + 1 * regstep));
8731                assign(du2, getDRegI64(rD + 2 * regstep));
8732                assign(du3, getDRegI64(rD + 3 * regstep));
8733                // Note spooky interleaving: du0, du2, di0, di1 etc
8734                math_INTERLEAVE_2(&di0, &di1, du0, du2, 1 << size);
8735                math_INTERLEAVE_2(&di2, &di3, du1, du3, 1 << size);
8736                storeLE(a0, mkexpr(di0));
8737                storeLE(a1, mkexpr(di1));
8738                storeLE(a2, mkexpr(di2));
8739                storeLE(a3, mkexpr(di3));
8740             }
8741
8742             IRTemp tmp = newTemp(Ity_I32);
8743             assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8744             addr = tmp;
8745          }
8746       }
8747       else
8748       if (N == 2 /* 3-interleaving -- VLD3/VST3 */) {
8749          // Dd, Dd+1, Dd+2   regs = 1, inc = 1
8750          // Dd, Dd+2, Dd+4   regs = 1, inc = 2
8751          vassert(regs == 1 && (inc == 1 || inc == 2));
8752          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8753          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8754          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8755          IRTemp  di0 = newTemp(Ity_I64);
8756          IRTemp  di1 = newTemp(Ity_I64);
8757          IRTemp  di2 = newTemp(Ity_I64);
8758          IRTemp  du0 = newTemp(Ity_I64);
8759          IRTemp  du1 = newTemp(Ity_I64);
8760          IRTemp  du2 = newTemp(Ity_I64);
8761          if (bL) {
8762             assign(di0, loadLE(Ity_I64, a0));
8763             assign(di1, loadLE(Ity_I64, a1));
8764             assign(di2, loadLE(Ity_I64, a2));
8765             math_DEINTERLEAVE_3(&du0, &du1, &du2, di0, di1, di2, 1 << size);
8766             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8767             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8768             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8769          } else {
8770             assign(du0, getDRegI64(rD + 0 * inc));
8771             assign(du1, getDRegI64(rD + 1 * inc));
8772             assign(du2, getDRegI64(rD + 2 * inc));
8773             math_INTERLEAVE_3(&di0, &di1, &di2, du0, du1, du2, 1 << size);
8774             storeLE(a0, mkexpr(di0));
8775             storeLE(a1, mkexpr(di1));
8776             storeLE(a2, mkexpr(di2));
8777          }
8778          IRTemp tmp = newTemp(Ity_I32);
8779          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(24)));
8780          addr = tmp;
8781       }
8782       else
8783       if (N == 3 /* 4-interleaving -- VLD4/VST4 */) {
8784          // Dd, Dd+1, Dd+2, Dd+3   regs = 1, inc = 1
8785          // Dd, Dd+2, Dd+4, Dd+6   regs = 1, inc = 2
8786          vassert(regs == 1 && (inc == 1 || inc == 2));
8787          IRExpr* a0  = binop(Iop_Add32, mkexpr(addr), mkU32(0));
8788          IRExpr* a1  = binop(Iop_Add32, mkexpr(addr), mkU32(8));
8789          IRExpr* a2  = binop(Iop_Add32, mkexpr(addr), mkU32(16));
8790          IRExpr* a3  = binop(Iop_Add32, mkexpr(addr), mkU32(24));
8791          IRTemp  di0 = newTemp(Ity_I64);
8792          IRTemp  di1 = newTemp(Ity_I64);
8793          IRTemp  di2 = newTemp(Ity_I64);
8794          IRTemp  di3 = newTemp(Ity_I64);
8795          IRTemp  du0 = newTemp(Ity_I64);
8796          IRTemp  du1 = newTemp(Ity_I64);
8797          IRTemp  du2 = newTemp(Ity_I64);
8798          IRTemp  du3 = newTemp(Ity_I64);
8799          if (bL) {
8800             assign(di0, loadLE(Ity_I64, a0));
8801             assign(di1, loadLE(Ity_I64, a1));
8802             assign(di2, loadLE(Ity_I64, a2));
8803             assign(di3, loadLE(Ity_I64, a3));
8804             math_DEINTERLEAVE_4(&du0, &du1, &du2, &du3,
8805                                 di0, di1, di2, di3, 1 << size);
8806             putDRegI64(rD + 0 * inc, mkexpr(du0), IRTemp_INVALID);
8807             putDRegI64(rD + 1 * inc, mkexpr(du1), IRTemp_INVALID);
8808             putDRegI64(rD + 2 * inc, mkexpr(du2), IRTemp_INVALID);
8809             putDRegI64(rD + 3 * inc, mkexpr(du3), IRTemp_INVALID);
8810          } else {
8811             assign(du0, getDRegI64(rD + 0 * inc));
8812             assign(du1, getDRegI64(rD + 1 * inc));
8813             assign(du2, getDRegI64(rD + 2 * inc));
8814             assign(du3, getDRegI64(rD + 3 * inc));
8815             math_INTERLEAVE_4(&di0, &di1, &di2, &di3,
8816                               du0, du1, du2, du3, 1 << size);
8817             storeLE(a0, mkexpr(di0));
8818             storeLE(a1, mkexpr(di1));
8819             storeLE(a2, mkexpr(di2));
8820             storeLE(a3, mkexpr(di3));
8821          }
8822          IRTemp tmp = newTemp(Ity_I32);
8823          assign(tmp, binop(Iop_Add32, mkexpr(addr), mkU32(32)));
8824          addr = tmp;
8825       }
8826       else {
8827          vassert(0);
8828       }
8829
8830       /* Writeback */
8831       if (rM != 15) {
8832          IRExpr* e;
8833          if (rM == 13) {
8834             e = binop(Iop_Add32, mkexpr(initialRn),
8835                                  mkU32(8 * (N + 1) * regs));
8836          } else {
8837             e = binop(Iop_Add32, mkexpr(initialRn),
8838                                  mkexpr(initialRm));
8839          }
8840          if (isT)
8841             putIRegT(rN, e, IRTemp_INVALID);
8842          else
8843             putIRegA(rN, e, IRTemp_INVALID, Ijk_Boring);
8844       }
8845
8846       DIP("v%s%u.%d {", bL ? "ld" : "st", N + 1, 8 << INSN(7,6));
8847       if ((inc == 1 && regs * (N + 1) > 1)
8848           || (inc == 2 && regs > 1 && N > 0)) {
8849          DIP("d%u-d%u", rD, rD + regs * (N + 1) - 1);
8850       } else {
8851          UInt r;
8852          for (r = 0; r < regs; r++) {
8853             for (i = 0; i <= N; i++) {
8854                if (i || r)
8855                   DIP(", ");
8856                DIP("d%u", rD + r + i * inc);
8857             }
8858          }
8859       }
8860       DIP("}, [r%u]", rN);
8861       if (rM != 13 && rM != 15) {
8862          DIP(", r%u\n", rM);
8863       } else {
8864          DIP("%s\n", (rM != 15) ? "!" : "");
8865       }
8866       return True;
8867    }
8868 #  undef INSN
8869 }
8870
8871
8872 /*------------------------------------------------------------*/
8873 /*--- NEON, top level control                              ---*/
8874 /*------------------------------------------------------------*/
8875
8876 /* Both ARM and Thumb */
8877
8878 /* Translate a NEON instruction.    If successful, returns
8879    True and *dres may or may not be updated.  If failure, returns
8880    False and doesn't change *dres nor create any IR.
8881
8882    The Thumb and ARM encodings are similar for the 24 bottom bits, but
8883    the top 8 bits are slightly different.  In both cases, the caller
8884    must pass the entire 32 bits.  Callers may pass any instruction;
8885    this ignores non-NEON ones.
8886
8887    Caller must supply an IRTemp 'condT' holding the gating condition,
8888    or IRTemp_INVALID indicating the insn is always executed.  In ARM
8889    code, this must always be IRTemp_INVALID because NEON insns are
8890    unconditional for ARM.
8891
8892    Finally, the caller must indicate whether this occurs in ARM or in
8893    Thumb code.
8894
8895    This only handles NEON for ARMv7 and below.  The NEON extensions
8896    for v8 are handled by decode_V8_instruction.
8897 */
8898 static Bool decode_NEON_instruction_ARMv7_and_below (
8899                /*MOD*/DisResult* dres,
8900                UInt              insn32,
8901                IRTemp            condT,
8902                Bool              isT
8903             )
8904 {
8905 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn32, (_bMax), (_bMin))
8906
8907    /* There are two kinds of instruction to deal with: load/store and
8908       data processing.  In each case, in ARM mode we merely identify
8909       the kind, and pass it on to the relevant sub-handler.  In Thumb
8910       mode we identify the kind, swizzle the bits around to make it
8911       have the same encoding as in ARM, and hand it on to the
8912       sub-handler.
8913    */
8914
8915    /* In ARM mode, NEON instructions can't be conditional. */
8916    if (!isT)
8917       vassert(condT == IRTemp_INVALID);
8918
8919    /* Data processing:
8920       Thumb: 111U 1111 AAAA Axxx xxxx BBBB CCCC xxxx
8921       ARM:   1111 001U AAAA Axxx xxxx BBBB CCCC xxxx
8922    */
8923    if (!isT && INSN(31,25) == BITS7(1,1,1,1,0,0,1)) {
8924       // ARM, DP
8925       return dis_neon_data_processing(INSN(31,0), condT);
8926    }
8927    if (isT && INSN(31,29) == BITS3(1,1,1)
8928        && INSN(27,24) == BITS4(1,1,1,1)) {
8929       // Thumb, DP
8930       UInt reformatted = INSN(23,0);
8931       reformatted |= (((UInt)INSN(28,28)) << 24); // U bit
8932       reformatted |= (((UInt)BITS7(1,1,1,1,0,0,1)) << 25);
8933       return dis_neon_data_processing(reformatted, condT);
8934    }
8935
8936    /* Load/store:
8937       Thumb: 1111 1001 AxL0 xxxx xxxx BBBB xxxx xxxx
8938       ARM:   1111 0100 AxL0 xxxx xxxx BBBB xxxx xxxx
8939    */
8940    if (!isT && INSN(31,24) == BITS8(1,1,1,1,0,1,0,0)) {
8941       // ARM, memory
8942       return dis_neon_load_or_store(INSN(31,0), isT, condT);
8943    }
8944    if (isT && INSN(31,24) == BITS8(1,1,1,1,1,0,0,1)) {
8945       UInt reformatted = INSN(23,0);
8946       reformatted |= (((UInt)BITS8(1,1,1,1,0,1,0,0)) << 24);
8947       return dis_neon_load_or_store(reformatted, isT, condT);
8948    }
8949
8950    /* Doesn't match. */
8951    return False;
8952
8953 #  undef INSN
8954 }
8955
8956
8957 /*------------------------------------------------------------*/
8958 /*--- V6 MEDIA instructions                                ---*/
8959 /*------------------------------------------------------------*/
8960
8961 /* Both ARM and Thumb */
8962
8963 /* Translate a V6 media instruction.    If successful, returns
8964    True and *dres may or may not be updated.  If failure, returns
8965    False and doesn't change *dres nor create any IR.
8966
8967    The Thumb and ARM encodings are completely different.  In Thumb
8968    mode, the caller must pass the entire 32 bits.  In ARM mode it must
8969    pass the lower 28 bits.  Apart from that, callers may pass any
8970    instruction; this function ignores anything it doesn't recognise.
8971
8972    Caller must supply an IRTemp 'condT' holding the gating condition,
8973    or IRTemp_INVALID indicating the insn is always executed.
8974
8975    Caller must also supply an ARMCondcode 'conq'.  This is only used
8976    for debug printing, no other purpose.  For ARM, this is simply the
8977    top 4 bits of the original instruction.  For Thumb, the condition
8978    is not (really) known until run time, and so ARMCondAL should be
8979    passed, only so that printing of these instructions does not show
8980    any condition.
8981
8982    Finally, the caller must indicate whether this occurs in ARM or in
8983    Thumb code.
8984 */
8985 static Bool decode_V6MEDIA_instruction (
8986                /*MOD*/DisResult* dres,
8987                UInt              insnv6m,
8988                IRTemp            condT,
8989                ARMCondcode       conq,
8990                Bool              isT
8991             )
8992 {
8993 #  define INSNA(_bMax,_bMin)   SLICE_UInt(insnv6m, (_bMax), (_bMin))
8994 #  define INSNT0(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 16) & 0xFFFF), \
8995                                            (_bMax), (_bMin) )
8996 #  define INSNT1(_bMax,_bMin)  SLICE_UInt( ((insnv6m >> 0)  & 0xFFFF), \
8997                                            (_bMax), (_bMin) )
8998    HChar dis_buf[128];
8999    dis_buf[0] = 0;
9000
9001    if (isT) {
9002       vassert(conq == ARMCondAL);
9003    } else {
9004       vassert(INSNA(31,28) == BITS4(0,0,0,0)); // caller's obligation
9005       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
9006    }
9007
9008    /* ----------- smulbb, smulbt, smultb, smultt ----------- */
9009    {
9010      UInt regD = 99, regM = 99, regN = 99, bitM = 0, bitN = 0;
9011      Bool gate = False;
9012
9013      if (isT) {
9014         if (INSNT0(15,4) == 0xFB1 && INSNT1(15,12) == BITS4(1,1,1,1)
9015             && INSNT1(7,6) == BITS2(0,0)) {
9016            regD = INSNT1(11,8);
9017            regM = INSNT1(3,0);
9018            regN = INSNT0(3,0);
9019            bitM = INSNT1(4,4);
9020            bitN = INSNT1(5,5);
9021            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9022               gate = True;
9023         }
9024      } else {
9025         if (BITS8(0,0,0,1,0,1,1,0) == INSNA(27,20) &&
9026             BITS4(0,0,0,0)         == INSNA(15,12) &&
9027             BITS4(1,0,0,0)         == (INSNA(7,4) & BITS4(1,0,0,1)) ) {
9028            regD = INSNA(19,16);
9029            regM = INSNA(11,8);
9030            regN = INSNA(3,0);
9031            bitM = INSNA(6,6);
9032            bitN = INSNA(5,5);
9033            if (regD != 15 && regN != 15 && regM != 15)
9034               gate = True;
9035         }
9036      }
9037
9038      if (gate) {
9039         IRTemp srcN = newTemp(Ity_I32);
9040         IRTemp srcM = newTemp(Ity_I32);
9041         IRTemp res  = newTemp(Ity_I32);
9042
9043         assign( srcN, binop(Iop_Sar32,
9044                             binop(Iop_Shl32,
9045                                   isT ? getIRegT(regN) : getIRegA(regN),
9046                                   mkU8(bitN ? 0 : 16)), mkU8(16)) );
9047         assign( srcM, binop(Iop_Sar32,
9048                             binop(Iop_Shl32,
9049                                   isT ? getIRegT(regM) : getIRegA(regM),
9050                                   mkU8(bitM ? 0 : 16)), mkU8(16)) );
9051         assign( res, binop(Iop_Mul32, mkexpr(srcN), mkexpr(srcM)) );
9052
9053         if (isT)
9054            putIRegT( regD, mkexpr(res), condT );
9055         else
9056            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9057
9058         DIP( "smul%c%c%s r%u, r%u, r%u\n", bitN ? 't' : 'b', bitM ? 't' : 'b',
9059              nCC(conq), regD, regN, regM );
9060         return True;
9061      }
9062      /* fall through */
9063    }
9064
9065    /* ------------ smulwb<y><c> <Rd>,<Rn>,<Rm> ------------- */
9066    /* ------------ smulwt<y><c> <Rd>,<Rn>,<Rm> ------------- */
9067    {
9068      UInt regD = 99, regN = 99, regM = 99, bitM = 0;
9069      Bool gate = False;
9070
9071      if (isT) {
9072         if (INSNT0(15,4) == 0xFB3 && INSNT1(15,12) == BITS4(1,1,1,1)
9073             && INSNT1(7,5) == BITS3(0,0,0)) {
9074           regN = INSNT0(3,0);
9075           regD = INSNT1(11,8);
9076           regM = INSNT1(3,0);
9077           bitM = INSNT1(4,4);
9078           if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9079              gate = True;
9080         }
9081      } else {
9082         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
9083             INSNA(15,12) == BITS4(0,0,0,0)         &&
9084             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,1,0)) {
9085            regD = INSNA(19,16);
9086            regN = INSNA(3,0);
9087            regM = INSNA(11,8);
9088            bitM = INSNA(6,6);
9089            if (regD != 15 && regN != 15 && regM != 15)
9090               gate = True;
9091         }
9092      }
9093
9094      if (gate) {
9095         IRTemp irt_prod = newTemp(Ity_I64);
9096
9097         assign( irt_prod,
9098                 binop(Iop_MullS32,
9099                       isT ? getIRegT(regN) : getIRegA(regN),
9100                       binop(Iop_Sar32,
9101                             binop(Iop_Shl32,
9102                                   isT ? getIRegT(regM) : getIRegA(regM),
9103                                   mkU8(bitM ? 0 : 16)),
9104                             mkU8(16))) );
9105
9106         IRExpr* ire_result = binop(Iop_Or32,
9107                                    binop( Iop_Shl32,
9108                                           unop(Iop_64HIto32, mkexpr(irt_prod)),
9109                                           mkU8(16) ),
9110                                    binop( Iop_Shr32,
9111                                           unop(Iop_64to32, mkexpr(irt_prod)),
9112                                           mkU8(16) ) );
9113
9114         if (isT)
9115            putIRegT( regD, ire_result, condT );
9116         else
9117            putIRegA( regD, ire_result, condT, Ijk_Boring );
9118
9119         DIP("smulw%c%s r%u, r%u, r%u\n",
9120             bitM ? 't' : 'b', nCC(conq),regD,regN,regM);
9121         return True;
9122      }
9123      /* fall through */
9124    }
9125
9126    /* ------------ pkhbt<c> Rd, Rn, Rm {,LSL #imm} ------------- */
9127    /* ------------ pkhtb<c> Rd, Rn, Rm {,ASR #imm} ------------- */
9128    {
9129      UInt regD = 99, regN = 99, regM = 99, imm5 = 99, shift_type = 99;
9130      Bool tbform = False;
9131      Bool gate = False;
9132
9133      if (isT) {
9134         if (INSNT0(15,4) == 0xEAC
9135             && INSNT1(15,15) == 0 && INSNT1(4,4) == 0) {
9136            regN = INSNT0(3,0);
9137            regD = INSNT1(11,8);
9138            regM = INSNT1(3,0);
9139            imm5 = (INSNT1(14,12) << 2) | INSNT1(7,6);
9140            shift_type = (INSNT1(5,5) << 1) | 0;
9141            tbform = (INSNT1(5,5) == 0) ? False : True;
9142            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9143               gate = True;
9144         }
9145      } else {
9146         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
9147             INSNA(5,4)   == BITS2(0,1) /*          &&
9148             (INSNA(6,6)  == 0 || INSNA(6,6) == 1)
9149             This last bit with INSNA(6,6) is correct, but gcc 8 complains
9150             (correctly) that it is always true.  So I commented it out
9151             to keep gcc quiet. */ ) {
9152            regD = INSNA(15,12);
9153            regN = INSNA(19,16);
9154            regM = INSNA(3,0);
9155            imm5 = INSNA(11,7);
9156            shift_type = (INSNA(6,6) << 1) | 0;
9157            tbform = (INSNA(6,6) == 0) ? False : True;
9158            if (regD != 15 && regN != 15 && regM != 15)
9159               gate = True;
9160         }
9161      }
9162
9163      if (gate) {
9164         IRTemp irt_regM       = newTemp(Ity_I32);
9165         IRTemp irt_regM_shift = newTemp(Ity_I32);
9166         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
9167         compute_result_and_C_after_shift_by_imm5(
9168            dis_buf, &irt_regM_shift, NULL, irt_regM, shift_type, imm5, regM );
9169
9170         UInt mask = (tbform == True) ? 0x0000FFFF : 0xFFFF0000;
9171         IRExpr* ire_result
9172           = binop( Iop_Or32,
9173                    binop(Iop_And32, mkexpr(irt_regM_shift), mkU32(mask)),
9174                    binop(Iop_And32, isT ? getIRegT(regN) : getIRegA(regN),
9175                                     unop(Iop_Not32, mkU32(mask))) );
9176
9177         if (isT)
9178            putIRegT( regD, ire_result, condT );
9179         else
9180            putIRegA( regD, ire_result, condT, Ijk_Boring );
9181
9182         DIP( "pkh%s%s r%u, r%u, r%u %s\n", tbform ? "tb" : "bt",
9183              nCC(conq), regD, regN, regM, dis_buf );
9184
9185         return True;
9186      }
9187      /* fall through */
9188    }
9189
9190    /* ---------- usat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9191    {
9192      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9193      Bool gate = False;
9194
9195      if (isT) {
9196         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,1,0)
9197             && INSNT0(4,4) == 0
9198             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9199            regD       = INSNT1(11,8);
9200            regN       = INSNT0(3,0);
9201            shift_type = (INSNT0(5,5) << 1) | 0;
9202            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9203            sat_imm    = INSNT1(4,0);
9204            if (!isBadRegT(regD) && !isBadRegT(regN))
9205               gate = True;
9206            if (shift_type == BITS2(1,0) && imm5 == 0)
9207               gate = False;
9208         }
9209      } else {
9210         if (INSNA(27,21) == BITS7(0,1,1,0,1,1,1) &&
9211             INSNA(5,4)   == BITS2(0,1)) {
9212            regD       = INSNA(15,12);
9213            regN       = INSNA(3,0);
9214            shift_type = (INSNA(6,6) << 1) | 0;
9215            imm5       = INSNA(11,7);
9216            sat_imm    = INSNA(20,16);
9217            if (regD != 15 && regN != 15)
9218               gate = True;
9219         }
9220      }
9221
9222      if (gate) {
9223         IRTemp irt_regN       = newTemp(Ity_I32);
9224         IRTemp irt_regN_shift = newTemp(Ity_I32);
9225         IRTemp irt_sat_Q      = newTemp(Ity_I32);
9226         IRTemp irt_result     = newTemp(Ity_I32);
9227
9228         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9229         compute_result_and_C_after_shift_by_imm5(
9230                 dis_buf, &irt_regN_shift, NULL,
9231                 irt_regN, shift_type, imm5, regN );
9232
9233         armUnsignedSatQ( &irt_result, &irt_sat_Q, irt_regN_shift, sat_imm );
9234         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9235
9236         if (isT)
9237            putIRegT( regD, mkexpr(irt_result), condT );
9238         else
9239            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9240
9241         DIP("usat%s r%u, #0x%04x, %s\n",
9242             nCC(conq), regD, imm5, dis_buf);
9243         return True;
9244      }
9245      /* fall through */
9246    }
9247
9248   /* ----------- ssat<c> <Rd>,#<imm5>,<Rn>{,<shift>} ----------- */
9249    {
9250      UInt regD = 99, regN = 99, shift_type = 99, imm5 = 99, sat_imm = 99;
9251      Bool gate = False;
9252
9253      if (isT) {
9254         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9255             && INSNT0(4,4) == 0
9256             && INSNT1(15,15) == 0 && INSNT1(5,5) == 0) {
9257            regD       = INSNT1(11,8);
9258            regN       = INSNT0(3,0);
9259            shift_type = (INSNT0(5,5) << 1) | 0;
9260            imm5       = (INSNT1(14,12) << 2) | INSNT1(7,6);
9261            sat_imm    = INSNT1(4,0) + 1;
9262            if (!isBadRegT(regD) && !isBadRegT(regN))
9263               gate = True;
9264            if (shift_type == BITS2(1,0) && imm5 == 0)
9265               gate = False;
9266         }
9267      } else {
9268         if (INSNA(27,21) == BITS7(0,1,1,0,1,0,1) &&
9269             INSNA(5,4)   == BITS2(0,1)) {
9270            regD       = INSNA(15,12);
9271            regN       = INSNA(3,0);
9272            shift_type = (INSNA(6,6) << 1) | 0;
9273            imm5       = INSNA(11,7);
9274            sat_imm    = INSNA(20,16) + 1;
9275            if (regD != 15 && regN != 15)
9276               gate = True;
9277         }
9278      }
9279
9280      if (gate) {
9281         IRTemp irt_regN       = newTemp(Ity_I32);
9282         IRTemp irt_regN_shift = newTemp(Ity_I32);
9283         IRTemp irt_sat_Q      = newTemp(Ity_I32);
9284         IRTemp irt_result     = newTemp(Ity_I32);
9285
9286         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9287         compute_result_and_C_after_shift_by_imm5(
9288                 dis_buf, &irt_regN_shift, NULL,
9289                 irt_regN, shift_type, imm5, regN );
9290
9291         armSignedSatQ( irt_regN_shift, sat_imm, &irt_result, &irt_sat_Q );
9292         or_into_QFLAG32( mkexpr(irt_sat_Q), condT );
9293
9294         if (isT)
9295            putIRegT( regD, mkexpr(irt_result), condT );
9296         else
9297            putIRegA( regD, mkexpr(irt_result), condT, Ijk_Boring );
9298
9299         DIP( "ssat%s r%u, #0x%04x, %s\n",
9300              nCC(conq), regD, imm5, dis_buf);
9301         return True;
9302     }
9303     /* fall through */
9304   }
9305
9306    /* ----------- ssat16<c> <Rd>,#<imm>,<Rn> ----------- */
9307    {
9308      UInt regD = 99, regN = 99, sat_imm = 99;
9309      Bool gate = False;
9310
9311      if (isT) {
9312         if (INSNT0(15,6) == BITS10(1,1,1,1,0,0,1,1,0,0)
9313             && INSNT0(5,4) == BITS2(1,0)
9314             && INSNT1(15,12) == BITS4(0,0,0,0)
9315             && INSNT1(7,4) == BITS4(0,0,0,0)) {
9316            regD       = INSNT1(11,8);
9317            regN       = INSNT0(3,0);
9318            sat_imm    = INSNT1(3,0) + 1;
9319            if (!isBadRegT(regD) && !isBadRegT(regN))
9320               gate = True;
9321         }
9322      } else {
9323         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,1,0) &&
9324             INSNA(11,4)   == BITS8(1,1,1,1,0,0,1,1)) {
9325            regD       = INSNA(15,12);
9326            regN       = INSNA(3,0);
9327            sat_imm    = INSNA(19,16) + 1;
9328            if (regD != 15 && regN != 15)
9329               gate = True;
9330         }
9331      }
9332
9333      if (gate) {
9334         IRTemp irt_regN    = newTemp(Ity_I32);
9335         IRTemp irt_regN_lo = newTemp(Ity_I32);
9336         IRTemp irt_regN_hi = newTemp(Ity_I32);
9337         IRTemp irt_Q_lo    = newTemp(Ity_I32);
9338         IRTemp irt_Q_hi    = newTemp(Ity_I32);
9339         IRTemp irt_res_lo  = newTemp(Ity_I32);
9340         IRTemp irt_res_hi  = newTemp(Ity_I32);
9341
9342         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9343         assign( irt_regN_lo,
9344                 binop( Iop_Sar32,
9345                        binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9346                        mkU8(16)) );
9347         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9348
9349         armSignedSatQ( irt_regN_lo, sat_imm, &irt_res_lo, &irt_Q_lo );
9350         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9351
9352         armSignedSatQ( irt_regN_hi, sat_imm, &irt_res_hi, &irt_Q_hi );
9353         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9354
9355         IRExpr* ire_result
9356            = binop(Iop_Or32,
9357                    binop(Iop_And32, mkexpr(irt_res_lo), mkU32(0xFFFF)),
9358                    binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)));
9359         if (isT)
9360            putIRegT( regD, ire_result, condT );
9361         else
9362            putIRegA( regD, ire_result, condT, Ijk_Boring );
9363
9364         DIP( "ssat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9365         return True;
9366      }
9367      /* fall through */
9368    }
9369
9370    /* -------------- usat16<c> <Rd>,#<imm4>,<Rn> --------------- */
9371    {
9372      UInt regD = 99, regN = 99, sat_imm = 99;
9373      Bool gate = False;
9374
9375      if (isT) {
9376         if (INSNT0(15,4) == 0xF3A && (INSNT1(15,0) & 0xF0F0) == 0x0000) {
9377            regN = INSNT0(3,0);
9378            regD = INSNT1(11,8);
9379            sat_imm = INSNT1(3,0);
9380            if (!isBadRegT(regD) && !isBadRegT(regN))
9381               gate = True;
9382        }
9383      } else {
9384         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,1,0) &&
9385             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9386             INSNA(7,4)   == BITS4(0,0,1,1)) {
9387            regD    = INSNA(15,12);
9388            regN    = INSNA(3,0);
9389            sat_imm = INSNA(19,16);
9390            if (regD != 15 && regN != 15)
9391               gate = True;
9392         }
9393      }
9394
9395      if (gate) {
9396         IRTemp irt_regN    = newTemp(Ity_I32);
9397         IRTemp irt_regN_lo = newTemp(Ity_I32);
9398         IRTemp irt_regN_hi = newTemp(Ity_I32);
9399         IRTemp irt_Q_lo    = newTemp(Ity_I32);
9400         IRTemp irt_Q_hi    = newTemp(Ity_I32);
9401         IRTemp irt_res_lo  = newTemp(Ity_I32);
9402         IRTemp irt_res_hi  = newTemp(Ity_I32);
9403
9404         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
9405         assign( irt_regN_lo, binop( Iop_Sar32,
9406                                     binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
9407                                     mkU8(16)) );
9408         assign( irt_regN_hi, binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)) );
9409
9410         armUnsignedSatQ( &irt_res_lo, &irt_Q_lo, irt_regN_lo, sat_imm );
9411         or_into_QFLAG32( mkexpr(irt_Q_lo), condT );
9412
9413         armUnsignedSatQ( &irt_res_hi, &irt_Q_hi, irt_regN_hi, sat_imm );
9414         or_into_QFLAG32( mkexpr(irt_Q_hi), condT );
9415
9416         IRExpr* ire_result = binop( Iop_Or32,
9417                                     binop(Iop_Shl32, mkexpr(irt_res_hi), mkU8(16)),
9418                                     mkexpr(irt_res_lo) );
9419
9420         if (isT)
9421            putIRegT( regD, ire_result, condT );
9422         else
9423            putIRegA( regD, ire_result, condT, Ijk_Boring );
9424
9425         DIP( "usat16%s r%u, #0x%04x, r%u\n", nCC(conq), regD, sat_imm, regN );
9426         return True;
9427      }
9428      /* fall through */
9429    }
9430
9431    /* -------------- uadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9432    {
9433      UInt regD = 99, regN = 99, regM = 99;
9434      Bool gate = False;
9435
9436      if (isT) {
9437         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9438            regN = INSNT0(3,0);
9439            regD = INSNT1(11,8);
9440            regM = INSNT1(3,0);
9441            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9442               gate = True;
9443         }
9444      } else {
9445         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9446             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9447             INSNA(7,4)   == BITS4(0,0,0,1)) {
9448            regD = INSNA(15,12);
9449            regN = INSNA(19,16);
9450            regM = INSNA(3,0);
9451            if (regD != 15 && regN != 15 && regM != 15)
9452               gate = True;
9453         }
9454      }
9455
9456      if (gate) {
9457         IRTemp rNt  = newTemp(Ity_I32);
9458         IRTemp rMt  = newTemp(Ity_I32);
9459         IRTemp res  = newTemp(Ity_I32);
9460         IRTemp reso = newTemp(Ity_I32);
9461
9462         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9463         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9464
9465         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9466         if (isT)
9467            putIRegT( regD, mkexpr(res), condT );
9468         else
9469            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9470
9471         assign(reso, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
9472         set_GE_32_10_from_bits_31_15(reso, condT);
9473
9474         DIP("uadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9475         return True;
9476      }
9477      /* fall through */
9478    }
9479
9480    /* -------------- sadd16<c> <Rd>,<Rn>,<Rm> -------------- */
9481    {
9482      UInt regD = 99, regN = 99, regM = 99;
9483      Bool gate = False;
9484
9485      if (isT) {
9486         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9487            regN = INSNT0(3,0);
9488            regD = INSNT1(11,8);
9489            regM = INSNT1(3,0);
9490            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9491               gate = True;
9492         }
9493      } else {
9494         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9495             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9496             INSNA(7,4)   == BITS4(0,0,0,1)) {
9497            regD = INSNA(15,12);
9498            regN = INSNA(19,16);
9499            regM = INSNA(3,0);
9500            if (regD != 15 && regN != 15 && regM != 15)
9501               gate = True;
9502         }
9503      }
9504
9505      if (gate) {
9506         IRTemp rNt  = newTemp(Ity_I32);
9507         IRTemp rMt  = newTemp(Ity_I32);
9508         IRTemp res  = newTemp(Ity_I32);
9509         IRTemp reso = newTemp(Ity_I32);
9510
9511         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9512         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9513
9514         assign(res, binop(Iop_Add16x2, mkexpr(rNt), mkexpr(rMt)));
9515         if (isT)
9516            putIRegT( regD, mkexpr(res), condT );
9517         else
9518            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9519
9520         assign(reso, unop(Iop_Not32,
9521                           binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt))));
9522         set_GE_32_10_from_bits_31_15(reso, condT);
9523
9524         DIP("sadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9525         return True;
9526      }
9527      /* fall through */
9528    }
9529
9530    /* ---------------- usub16<c> <Rd>,<Rn>,<Rm> ---------------- */
9531    {
9532      UInt regD = 99, regN = 99, regM = 99;
9533      Bool gate = False;
9534
9535      if (isT) {
9536         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9537            regN = INSNT0(3,0);
9538            regD = INSNT1(11,8);
9539            regM = INSNT1(3,0);
9540            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9541               gate = True;
9542         }
9543      } else {
9544         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9545             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9546             INSNA(7,4)   == BITS4(0,1,1,1)) {
9547            regD = INSNA(15,12);
9548            regN = INSNA(19,16);
9549            regM = INSNA(3,0);
9550            if (regD != 15 && regN != 15 && regM != 15)
9551              gate = True;
9552         }
9553      }
9554
9555      if (gate) {
9556         IRTemp rNt  = newTemp(Ity_I32);
9557         IRTemp rMt  = newTemp(Ity_I32);
9558         IRTemp res  = newTemp(Ity_I32);
9559         IRTemp reso = newTemp(Ity_I32);
9560
9561         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9562         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9563
9564         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9565         if (isT)
9566            putIRegT( regD, mkexpr(res), condT );
9567         else
9568            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9569
9570         assign(reso, unop(Iop_Not32,
9571                           binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt))));
9572         set_GE_32_10_from_bits_31_15(reso, condT);
9573
9574         DIP("usub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9575         return True;
9576      }
9577      /* fall through */
9578    }
9579
9580    /* -------------- ssub16<c> <Rd>,<Rn>,<Rm> -------------- */
9581    {
9582      UInt regD = 99, regN = 99, regM = 99;
9583      Bool gate = False;
9584
9585      if (isT) {
9586         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9587            regN = INSNT0(3,0);
9588            regD = INSNT1(11,8);
9589            regM = INSNT1(3,0);
9590            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9591               gate = True;
9592         }
9593      } else {
9594         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9595             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9596             INSNA(7,4)   == BITS4(0,1,1,1)) {
9597            regD = INSNA(15,12);
9598            regN = INSNA(19,16);
9599            regM = INSNA(3,0);
9600            if (regD != 15 && regN != 15 && regM != 15)
9601               gate = True;
9602         }
9603      }
9604
9605      if (gate) {
9606         IRTemp rNt  = newTemp(Ity_I32);
9607         IRTemp rMt  = newTemp(Ity_I32);
9608         IRTemp res  = newTemp(Ity_I32);
9609         IRTemp reso = newTemp(Ity_I32);
9610
9611         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9612         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9613
9614         assign(res, binop(Iop_Sub16x2, mkexpr(rNt), mkexpr(rMt)));
9615         if (isT)
9616            putIRegT( regD, mkexpr(res), condT );
9617         else
9618            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9619
9620         assign(reso, unop(Iop_Not32,
9621                           binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt))));
9622         set_GE_32_10_from_bits_31_15(reso, condT);
9623
9624         DIP("ssub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9625         return True;
9626      }
9627      /* fall through */
9628    }
9629
9630    /* ----------------- uadd8<c> <Rd>,<Rn>,<Rm> ---------------- */
9631    {
9632      UInt regD = 99, regN = 99, regM = 99;
9633      Bool gate = False;
9634
9635      if (isT) {
9636         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9637            regN = INSNT0(3,0);
9638            regD = INSNT1(11,8);
9639            regM = INSNT1(3,0);
9640            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9641               gate = True;
9642         }
9643      } else {
9644         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9645             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9646             (INSNA(7,4)  == BITS4(1,0,0,1))) {
9647            regD = INSNA(15,12);
9648            regN = INSNA(19,16);
9649            regM = INSNA(3,0);
9650            if (regD != 15 && regN != 15 && regM != 15)
9651               gate = True;
9652         }
9653      }
9654
9655      if (gate) {
9656         IRTemp rNt  = newTemp(Ity_I32);
9657         IRTemp rMt  = newTemp(Ity_I32);
9658         IRTemp res  = newTemp(Ity_I32);
9659         IRTemp reso = newTemp(Ity_I32);
9660
9661         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9662         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9663
9664         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9665         if (isT)
9666            putIRegT( regD, mkexpr(res), condT );
9667         else
9668            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9669
9670         assign(reso, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9671         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9672
9673         DIP("uadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9674         return True;
9675      }
9676      /* fall through */
9677    }
9678
9679    /* ------------------- sadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9680    {
9681      UInt regD = 99, regN = 99, regM = 99;
9682      Bool gate = False;
9683
9684      if (isT) {
9685         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9686            regN = INSNT0(3,0);
9687            regD = INSNT1(11,8);
9688            regM = INSNT1(3,0);
9689            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9690               gate = True;
9691         }
9692      } else {
9693         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9694             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9695             (INSNA(7,4)  == BITS4(1,0,0,1))) {
9696            regD = INSNA(15,12);
9697            regN = INSNA(19,16);
9698            regM = INSNA(3,0);
9699            if (regD != 15 && regN != 15 && regM != 15)
9700               gate = True;
9701         }
9702      }
9703
9704      if (gate) {
9705         IRTemp rNt  = newTemp(Ity_I32);
9706         IRTemp rMt  = newTemp(Ity_I32);
9707         IRTemp res  = newTemp(Ity_I32);
9708         IRTemp reso = newTemp(Ity_I32);
9709
9710         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9711         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9712
9713         assign(res, binop(Iop_Add8x4, mkexpr(rNt), mkexpr(rMt)));
9714         if (isT)
9715            putIRegT( regD, mkexpr(res), condT );
9716         else
9717            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9718
9719         assign(reso, unop(Iop_Not32,
9720                           binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt))));
9721         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9722
9723         DIP("sadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9724         return True;
9725      }
9726      /* fall through */
9727    }
9728
9729    /* ------------------- usub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9730    {
9731      UInt regD = 99, regN = 99, regM = 99;
9732      Bool gate = False;
9733
9734      if (isT) {
9735         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
9736            regN = INSNT0(3,0);
9737            regD = INSNT1(11,8);
9738            regM = INSNT1(3,0);
9739            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9740               gate = True;
9741         }
9742      } else {
9743         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
9744             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9745             (INSNA(7,4)  == BITS4(1,1,1,1))) {
9746            regD = INSNA(15,12);
9747            regN = INSNA(19,16);
9748            regM = INSNA(3,0);
9749            if (regD != 15 && regN != 15 && regM != 15)
9750              gate = True;
9751         }
9752      }
9753
9754      if (gate) {
9755         IRTemp rNt  = newTemp(Ity_I32);
9756         IRTemp rMt  = newTemp(Ity_I32);
9757         IRTemp res  = newTemp(Ity_I32);
9758         IRTemp reso = newTemp(Ity_I32);
9759
9760         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9761         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9762
9763         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9764         if (isT)
9765            putIRegT( regD, mkexpr(res), condT );
9766         else
9767            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9768
9769         assign(reso, unop(Iop_Not32,
9770                           binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt))));
9771         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9772
9773         DIP("usub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9774         return True;
9775      }
9776      /* fall through */
9777    }
9778
9779    /* ------------------- ssub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9780    {
9781      UInt regD = 99, regN = 99, regM = 99;
9782      Bool gate = False;
9783
9784      if (isT) {
9785         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
9786            regN = INSNT0(3,0);
9787            regD = INSNT1(11,8);
9788            regM = INSNT1(3,0);
9789            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9790               gate = True;
9791         }
9792      } else {
9793         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
9794             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9795             INSNA(7,4)   == BITS4(1,1,1,1)) {
9796            regD = INSNA(15,12);
9797            regN = INSNA(19,16);
9798            regM = INSNA(3,0);
9799            if (regD != 15 && regN != 15 && regM != 15)
9800               gate = True;
9801         }
9802      }
9803
9804      if (gate) {
9805         IRTemp rNt  = newTemp(Ity_I32);
9806         IRTemp rMt  = newTemp(Ity_I32);
9807         IRTemp res  = newTemp(Ity_I32);
9808         IRTemp reso = newTemp(Ity_I32);
9809
9810         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9811         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9812
9813         assign(res, binop(Iop_Sub8x4, mkexpr(rNt), mkexpr(rMt)));
9814         if (isT)
9815            putIRegT( regD, mkexpr(res), condT );
9816         else
9817            putIRegA( regD, mkexpr(res), condT, Ijk_Boring );
9818
9819         assign(reso, unop(Iop_Not32,
9820                           binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt))));
9821         set_GE_3_2_1_0_from_bits_31_23_15_7(reso, condT);
9822
9823         DIP("ssub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9824         return True;
9825      }
9826      /* fall through */
9827    }
9828
9829    /* ------------------ qadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
9830    {
9831      UInt regD = 99, regN = 99, regM = 99;
9832      Bool gate = False;
9833
9834      if (isT) {
9835         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9836            regN = INSNT0(3,0);
9837            regD = INSNT1(11,8);
9838            regM = INSNT1(3,0);
9839            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9840               gate = True;
9841         }
9842      } else {
9843         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9844             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9845             INSNA(7,4)   == BITS4(1,0,0,1)) {
9846            regD = INSNA(15,12);
9847            regN = INSNA(19,16);
9848            regM = INSNA(3,0);
9849            if (regD != 15 && regN != 15 && regM != 15)
9850               gate = True;
9851         }
9852      }
9853
9854      if (gate) {
9855         IRTemp rNt   = newTemp(Ity_I32);
9856         IRTemp rMt   = newTemp(Ity_I32);
9857         IRTemp res_q = newTemp(Ity_I32);
9858
9859         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9860         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9861
9862         assign(res_q, binop(Iop_QAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
9863         if (isT)
9864            putIRegT( regD, mkexpr(res_q), condT );
9865         else
9866            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9867
9868         DIP("qadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9869         return True;
9870      }
9871      /* fall through */
9872    }
9873
9874    /* ------------------ qsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
9875    {
9876      UInt regD = 99, regN = 99, regM = 99;
9877      Bool gate = False;
9878
9879      if (isT) {
9880         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
9881            regN = INSNT0(3,0);
9882            regD = INSNT1(11,8);
9883            regM = INSNT1(3,0);
9884            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9885               gate = True;
9886         }
9887      } else {
9888         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
9889             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9890             INSNA(7,4)   == BITS4(1,1,1,1)) {
9891            regD = INSNA(15,12);
9892            regN = INSNA(19,16);
9893            regM = INSNA(3,0);
9894            if (regD != 15 && regN != 15 && regM != 15)
9895               gate = True;
9896         }
9897      }
9898
9899      if (gate) {
9900         IRTemp rNt   = newTemp(Ity_I32);
9901         IRTemp rMt   = newTemp(Ity_I32);
9902         IRTemp res_q = newTemp(Ity_I32);
9903
9904         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9905         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9906
9907         assign(res_q, binop(Iop_QSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
9908         if (isT)
9909            putIRegT( regD, mkexpr(res_q), condT );
9910         else
9911            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9912
9913         DIP("qsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9914         return True;
9915      }
9916      /* fall through */
9917    }
9918
9919    /* ------------------ uqadd8<c> <Rd>,<Rn>,<Rm> ------------------ */
9920    {
9921      UInt regD = 99, regN = 99, regM = 99;
9922      Bool gate = False;
9923
9924      if (isT) {
9925         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9926            regN = INSNT0(3,0);
9927            regD = INSNT1(11,8);
9928            regM = INSNT1(3,0);
9929            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9930               gate = True;
9931         }
9932      } else {
9933         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9934             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9935             (INSNA(7,4)  == BITS4(1,0,0,1))) {
9936            regD = INSNA(15,12);
9937            regN = INSNA(19,16);
9938            regM = INSNA(3,0);
9939            if (regD != 15 && regN != 15 && regM != 15)
9940               gate = True;
9941         }
9942      }
9943
9944      if (gate) {
9945         IRTemp rNt   = newTemp(Ity_I32);
9946         IRTemp rMt   = newTemp(Ity_I32);
9947         IRTemp res_q = newTemp(Ity_I32);
9948
9949         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9950         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9951
9952         assign(res_q, binop(Iop_QAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
9953         if (isT)
9954            putIRegT( regD, mkexpr(res_q), condT );
9955         else
9956            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
9957
9958         DIP("uqadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
9959         return True;
9960      }
9961      /* fall through */
9962    }
9963
9964    /* ------------------ uqsub8<c> <Rd>,<Rn>,<Rm> ------------------ */
9965    {
9966      UInt regD = 99, regN = 99, regM = 99;
9967      Bool gate = False;
9968
9969      if (isT) {
9970         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
9971            regN = INSNT0(3,0);
9972            regD = INSNT1(11,8);
9973            regM = INSNT1(3,0);
9974            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
9975               gate = True;
9976         }
9977      } else {
9978         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
9979             INSNA(11,8)  == BITS4(1,1,1,1)         &&
9980             (INSNA(7,4)  == BITS4(1,1,1,1))) {
9981            regD = INSNA(15,12);
9982            regN = INSNA(19,16);
9983            regM = INSNA(3,0);
9984            if (regD != 15 && regN != 15 && regM != 15)
9985              gate = True;
9986         }
9987      }
9988
9989      if (gate) {
9990         IRTemp rNt   = newTemp(Ity_I32);
9991         IRTemp rMt   = newTemp(Ity_I32);
9992         IRTemp res_q = newTemp(Ity_I32);
9993
9994         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
9995         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
9996
9997         assign(res_q, binop(Iop_QSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
9998         if (isT)
9999            putIRegT( regD, mkexpr(res_q), condT );
10000         else
10001            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10002
10003         DIP("uqsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10004         return True;
10005      }
10006      /* fall through */
10007    }
10008
10009    /* ----------------- uhadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
10010    {
10011      UInt regD = 99, regN = 99, regM = 99;
10012      Bool gate = False;
10013
10014      if (isT) {
10015         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
10016            regN = INSNT0(3,0);
10017            regD = INSNT1(11,8);
10018            regM = INSNT1(3,0);
10019            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10020               gate = True;
10021         }
10022      } else {
10023         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
10024             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10025             INSNA(7,4)   == BITS4(1,0,0,1)) {
10026            regD = INSNA(15,12);
10027            regN = INSNA(19,16);
10028            regM = INSNA(3,0);
10029            if (regD != 15 && regN != 15 && regM != 15)
10030               gate = True;
10031         }
10032      }
10033
10034      if (gate) {
10035         IRTemp rNt   = newTemp(Ity_I32);
10036         IRTemp rMt   = newTemp(Ity_I32);
10037         IRTemp res_q = newTemp(Ity_I32);
10038
10039         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10040         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10041
10042         assign(res_q, binop(Iop_HAdd8Ux4, mkexpr(rNt), mkexpr(rMt)));
10043         if (isT)
10044            putIRegT( regD, mkexpr(res_q), condT );
10045         else
10046            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10047
10048         DIP("uhadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10049         return True;
10050      }
10051      /* fall through */
10052    }
10053
10054    /* ----------------- uhadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
10055    {
10056      UInt regD = 99, regN = 99, regM = 99;
10057      Bool gate = False;
10058
10059      if (isT) {
10060         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
10061            regN = INSNT0(3,0);
10062            regD = INSNT1(11,8);
10063            regM = INSNT1(3,0);
10064            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10065               gate = True;
10066         }
10067      } else {
10068         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
10069             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10070             INSNA(7,4)   == BITS4(0,0,0,1)) {
10071            regD = INSNA(15,12);
10072            regN = INSNA(19,16);
10073            regM = INSNA(3,0);
10074            if (regD != 15 && regN != 15 && regM != 15)
10075               gate = True;
10076         }
10077      }
10078
10079      if (gate) {
10080         IRTemp rNt   = newTemp(Ity_I32);
10081         IRTemp rMt   = newTemp(Ity_I32);
10082         IRTemp res_q = newTemp(Ity_I32);
10083
10084         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10085         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10086
10087         assign(res_q, binop(Iop_HAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
10088         if (isT)
10089            putIRegT( regD, mkexpr(res_q), condT );
10090         else
10091            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10092
10093         DIP("uhadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10094         return True;
10095      }
10096      /* fall through */
10097    }
10098
10099    /* ----------------- shadd8<c> <Rd>,<Rn>,<Rm> ------------------- */
10100    {
10101      UInt regD = 99, regN = 99, regM = 99;
10102      Bool gate = False;
10103
10104      if (isT) {
10105         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
10106            regN = INSNT0(3,0);
10107            regD = INSNT1(11,8);
10108            regM = INSNT1(3,0);
10109            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10110               gate = True;
10111         }
10112      } else {
10113         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
10114             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10115             INSNA(7,4)   == BITS4(1,0,0,1)) {
10116            regD = INSNA(15,12);
10117            regN = INSNA(19,16);
10118            regM = INSNA(3,0);
10119            if (regD != 15 && regN != 15 && regM != 15)
10120               gate = True;
10121         }
10122      }
10123
10124      if (gate) {
10125         IRTemp rNt   = newTemp(Ity_I32);
10126         IRTemp rMt   = newTemp(Ity_I32);
10127         IRTemp res_q = newTemp(Ity_I32);
10128
10129         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10130         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10131
10132         assign(res_q, binop(Iop_HAdd8Sx4, mkexpr(rNt), mkexpr(rMt)));
10133         if (isT)
10134            putIRegT( regD, mkexpr(res_q), condT );
10135         else
10136            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10137
10138         DIP("shadd8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10139         return True;
10140      }
10141      /* fall through */
10142    }
10143
10144    /* ------------------ qadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
10145    {
10146      UInt regD = 99, regN = 99, regM = 99;
10147      Bool gate = False;
10148
10149      if (isT) {
10150         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10151            regN = INSNT0(3,0);
10152            regD = INSNT1(11,8);
10153            regM = INSNT1(3,0);
10154            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10155               gate = True;
10156         }
10157      } else {
10158         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10159             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10160             INSNA(7,4)   == BITS4(0,0,0,1)) {
10161            regD = INSNA(15,12);
10162            regN = INSNA(19,16);
10163            regM = INSNA(3,0);
10164            if (regD != 15 && regN != 15 && regM != 15)
10165               gate = True;
10166         }
10167      }
10168
10169      if (gate) {
10170         IRTemp rNt   = newTemp(Ity_I32);
10171         IRTemp rMt   = newTemp(Ity_I32);
10172         IRTemp res_q = newTemp(Ity_I32);
10173
10174         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10175         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10176
10177         assign(res_q, binop(Iop_QAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
10178         if (isT)
10179            putIRegT( regD, mkexpr(res_q), condT );
10180         else
10181            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10182
10183         DIP("qadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10184         return True;
10185      }
10186      /* fall through */
10187    }
10188
10189    /* ------------------ qsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
10190    {
10191      UInt regD = 99, regN = 99, regM = 99;
10192      Bool gate = False;
10193
10194       if (isT) {
10195         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10196            regN = INSNT0(3,0);
10197            regD = INSNT1(11,8);
10198            regM = INSNT1(3,0);
10199            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10200               gate = True;
10201         }
10202      } else {
10203         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10204             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10205             INSNA(7,4)   == BITS4(0,1,1,1)) {
10206            regD = INSNA(15,12);
10207            regN = INSNA(19,16);
10208            regM = INSNA(3,0);
10209            if (regD != 15 && regN != 15 && regM != 15)
10210              gate = True;
10211         }
10212      }
10213
10214      if (gate) {
10215         IRTemp rNt   = newTemp(Ity_I32);
10216         IRTemp rMt   = newTemp(Ity_I32);
10217         IRTemp res_q = newTemp(Ity_I32);
10218
10219         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
10220         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
10221
10222         assign(res_q, binop(Iop_QSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
10223         if (isT)
10224            putIRegT( regD, mkexpr(res_q), condT );
10225         else
10226            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
10227
10228         DIP("qsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
10229         return True;
10230      }
10231      /* fall through */
10232    }
10233
10234    /* ------------------- qsax<c> <Rd>,<Rn>,<Rm> ------------------- */
10235    /* note: the hardware seems to construct the result differently
10236       from wot the manual says. */
10237    {
10238      UInt regD = 99, regN = 99, regM = 99;
10239      Bool gate = False;
10240
10241      if (isT) {
10242         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10243            regN = INSNT0(3,0);
10244            regD = INSNT1(11,8);
10245            regM = INSNT1(3,0);
10246            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10247               gate = True;
10248         }
10249      } else {
10250         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10251             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10252             INSNA(7,4)   == BITS4(0,1,0,1)) {
10253            regD = INSNA(15,12);
10254            regN = INSNA(19,16);
10255            regM = INSNA(3,0);
10256            if (regD != 15 && regN != 15 && regM != 15)
10257               gate = True;
10258         }
10259      }
10260
10261      if (gate) {
10262         IRTemp irt_regN     = newTemp(Ity_I32);
10263         IRTemp irt_regM     = newTemp(Ity_I32);
10264         IRTemp irt_sum      = newTemp(Ity_I32);
10265         IRTemp irt_diff     = newTemp(Ity_I32);
10266         IRTemp irt_sum_res  = newTemp(Ity_I32);
10267         IRTemp irt_diff_res = newTemp(Ity_I32);
10268
10269         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10270         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10271
10272         assign( irt_diff,
10273                 binop( Iop_Sub32,
10274                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10275                        binop( Iop_Sar32,
10276                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10277                               mkU8(16) ) ) );
10278         armSignedSatQ( irt_diff, 0x10, &irt_diff_res, NULL);
10279
10280         assign( irt_sum,
10281                 binop( Iop_Add32,
10282                        binop( Iop_Sar32,
10283                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10284                               mkU8(16) ),
10285                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) )) );
10286         armSignedSatQ( irt_sum, 0x10, &irt_sum_res, NULL );
10287
10288         IRExpr* ire_result = binop( Iop_Or32,
10289                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
10290                                            mkU8(16) ),
10291                                     binop( Iop_And32, mkexpr(irt_sum_res),
10292                                            mkU32(0xFFFF)) );
10293
10294         if (isT)
10295            putIRegT( regD, ire_result, condT );
10296         else
10297            putIRegA( regD, ire_result, condT, Ijk_Boring );
10298
10299         DIP( "qsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10300         return True;
10301      }
10302      /* fall through */
10303    }
10304
10305    /* ------------------- qasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10306    {
10307      UInt regD = 99, regN = 99, regM = 99;
10308      Bool gate = False;
10309
10310      if (isT) {
10311         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF010) {
10312            regN = INSNT0(3,0);
10313            regD = INSNT1(11,8);
10314            regM = INSNT1(3,0);
10315            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10316               gate = True;
10317         }
10318      } else {
10319         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,0) &&
10320             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10321             INSNA(7,4)   == BITS4(0,0,1,1)) {
10322            regD = INSNA(15,12);
10323            regN = INSNA(19,16);
10324            regM = INSNA(3,0);
10325            if (regD != 15 && regN != 15 && regM != 15)
10326               gate = True;
10327         }
10328      }
10329
10330      if (gate) {
10331         IRTemp irt_regN     = newTemp(Ity_I32);
10332         IRTemp irt_regM     = newTemp(Ity_I32);
10333         IRTemp irt_sum      = newTemp(Ity_I32);
10334         IRTemp irt_diff     = newTemp(Ity_I32);
10335         IRTemp irt_res_sum  = newTemp(Ity_I32);
10336         IRTemp irt_res_diff = newTemp(Ity_I32);
10337
10338         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10339         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10340
10341         assign( irt_diff,
10342                 binop( Iop_Sub32,
10343                        binop( Iop_Sar32,
10344                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10345                               mkU8(16) ),
10346                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10347         armSignedSatQ( irt_diff, 0x10, &irt_res_diff, NULL );
10348
10349         assign( irt_sum,
10350                 binop( Iop_Add32,
10351                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10352                        binop( Iop_Sar32,
10353                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10354                               mkU8(16) ) ) );
10355         armSignedSatQ( irt_sum, 0x10, &irt_res_sum, NULL );
10356
10357         IRExpr* ire_result
10358           = binop( Iop_Or32,
10359                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
10360                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
10361
10362         if (isT)
10363            putIRegT( regD, ire_result, condT );
10364         else
10365            putIRegA( regD, ire_result, condT, Ijk_Boring );
10366
10367         DIP( "qasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10368         return True;
10369      }
10370      /* fall through */
10371    }
10372
10373    /* ------------------- sasx<c> <Rd>,<Rn>,<Rm> ------------------- */
10374    {
10375      UInt regD = 99, regN = 99, regM = 99;
10376      Bool gate = False;
10377
10378      if (isT) {
10379         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
10380            regN = INSNT0(3,0);
10381            regD = INSNT1(11,8);
10382            regM = INSNT1(3,0);
10383            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10384               gate = True;
10385         }
10386      } else {
10387         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
10388             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10389             INSNA(7,4)   == BITS4(0,0,1,1)) {
10390            regD = INSNA(15,12);
10391            regN = INSNA(19,16);
10392            regM = INSNA(3,0);
10393            if (regD != 15 && regN != 15 && regM != 15)
10394               gate = True;
10395         }
10396      }
10397
10398      if (gate) {
10399         IRTemp irt_regN = newTemp(Ity_I32);
10400         IRTemp irt_regM = newTemp(Ity_I32);
10401         IRTemp irt_sum  = newTemp(Ity_I32);
10402         IRTemp irt_diff = newTemp(Ity_I32);
10403
10404         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10405         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10406
10407         assign( irt_diff,
10408                 binop( Iop_Sub32,
10409                        binop( Iop_Sar32,
10410                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10411                               mkU8(16) ),
10412                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10413
10414         assign( irt_sum,
10415                 binop( Iop_Add32,
10416                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10417                        binop( Iop_Sar32,
10418                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10419                               mkU8(16) ) ) );
10420
10421         IRExpr* ire_result
10422           = binop( Iop_Or32,
10423                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
10424                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
10425
10426         IRTemp ge10 = newTemp(Ity_I32);
10427         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
10428         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
10429         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
10430
10431         IRTemp ge32 = newTemp(Ity_I32);
10432         assign(ge32, unop(Iop_Not32, mkexpr(irt_sum)));
10433         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
10434         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
10435
10436         if (isT)
10437            putIRegT( regD, ire_result, condT );
10438         else
10439            putIRegA( regD, ire_result, condT, Ijk_Boring );
10440
10441         DIP( "sasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10442         return True;
10443      }
10444      /* fall through */
10445    }
10446
10447    /* --------------- smuad, smuadx<c><Rd>,<Rn>,<Rm> --------------- */
10448    /* --------------- smsad, smsadx<c><Rd>,<Rn>,<Rm> --------------- */
10449    {
10450      UInt regD = 99, regN = 99, regM = 99, bitM = 99;
10451      Bool gate = False, isAD = False;
10452
10453      if (isT) {
10454         if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10455             && (INSNT1(15,0) & 0xF0E0) == 0xF000) {
10456            regN = INSNT0(3,0);
10457            regD = INSNT1(11,8);
10458            regM = INSNT1(3,0);
10459            bitM = INSNT1(4,4);
10460            isAD = INSNT0(15,4) == 0xFB2;
10461            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10462               gate = True;
10463         }
10464      } else {
10465         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10466             INSNA(15,12) == BITS4(1,1,1,1)         &&
10467             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1) ) {
10468            regD = INSNA(19,16);
10469            regN = INSNA(3,0);
10470            regM = INSNA(11,8);
10471            bitM = INSNA(5,5);
10472            isAD = INSNA(6,6) == 0;
10473            if (regD != 15 && regN != 15 && regM != 15)
10474               gate = True;
10475         }
10476      }
10477
10478      if (gate) {
10479         IRTemp irt_regN    = newTemp(Ity_I32);
10480         IRTemp irt_regM    = newTemp(Ity_I32);
10481         IRTemp irt_prod_lo = newTemp(Ity_I32);
10482         IRTemp irt_prod_hi = newTemp(Ity_I32);
10483         IRTemp tmpM        = newTemp(Ity_I32);
10484
10485         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10486
10487         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10488         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10489
10490         assign( irt_prod_lo,
10491                 binop( Iop_Mul32,
10492                        binop( Iop_Sar32,
10493                               binop(Iop_Shl32, mkexpr(irt_regN), mkU8(16)),
10494                               mkU8(16) ),
10495                        binop( Iop_Sar32,
10496                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
10497                               mkU8(16) ) ) );
10498         assign( irt_prod_hi, binop(Iop_Mul32,
10499                                    binop(Iop_Sar32, mkexpr(irt_regN), mkU8(16)),
10500                                    binop(Iop_Sar32, mkexpr(irt_regM), mkU8(16))) );
10501         IRExpr* ire_result
10502            = binop( isAD ? Iop_Add32 : Iop_Sub32,
10503                     mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) );
10504
10505         if (isT)
10506            putIRegT( regD, ire_result, condT );
10507         else
10508            putIRegA( regD, ire_result, condT, Ijk_Boring );
10509
10510         if (isAD) {
10511            or_into_QFLAG32(
10512               signed_overflow_after_Add32( ire_result,
10513                                            irt_prod_lo, irt_prod_hi ),
10514               condT
10515            );
10516         }
10517
10518         DIP("smu%cd%s%s r%u, r%u, r%u\n",
10519             isAD ? 'a' : 's',
10520             bitM ? "x" : "", nCC(conq), regD, regN, regM);
10521         return True;
10522      }
10523      /* fall through */
10524    }
10525
10526    /* --------------- smlad{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10527    /* --------------- smlsd{X}<c> <Rd>,<Rn>,<Rm>,<Ra> -------------- */
10528    {
10529      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10530      Bool gate = False, isAD = False;
10531
10532      if (isT) {
10533        if ((INSNT0(15,4) == 0xFB2 || INSNT0(15,4) == 0xFB4)
10534            && INSNT1(7,5) == BITS3(0,0,0)) {
10535            regN = INSNT0(3,0);
10536            regD = INSNT1(11,8);
10537            regM = INSNT1(3,0);
10538            regA = INSNT1(15,12);
10539            bitM = INSNT1(4,4);
10540            isAD = INSNT0(15,4) == 0xFB2;
10541            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10542                && !isBadRegT(regA))
10543               gate = True;
10544         }
10545      } else {
10546         if (INSNA(27,20) == BITS8(0,1,1,1,0,0,0,0) &&
10547             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
10548            regD = INSNA(19,16);
10549            regA = INSNA(15,12);
10550            regN = INSNA(3,0);
10551            regM = INSNA(11,8);
10552            bitM = INSNA(5,5);
10553            isAD = INSNA(6,6) == 0;
10554            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10555               gate = True;
10556         }
10557      }
10558
10559      if (gate) {
10560         IRTemp irt_regN    = newTemp(Ity_I32);
10561         IRTemp irt_regM    = newTemp(Ity_I32);
10562         IRTemp irt_regA    = newTemp(Ity_I32);
10563         IRTemp irt_prod_lo = newTemp(Ity_I32);
10564         IRTemp irt_prod_hi = newTemp(Ity_I32);
10565         IRTemp irt_sum     = newTemp(Ity_I32);
10566         IRTemp tmpM        = newTemp(Ity_I32);
10567
10568         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10569         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10570
10571         assign( tmpM, isT ? getIRegT(regM) : getIRegA(regM) );
10572         assign( irt_regM, genROR32(tmpM, (bitM & 1) ? 16 : 0) );
10573
10574         assign( irt_prod_lo,
10575                 binop(Iop_Mul32,
10576                       binop(Iop_Sar32,
10577                             binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
10578                             mkU8(16)),
10579                       binop(Iop_Sar32,
10580                             binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
10581                             mkU8(16))) );
10582         assign( irt_prod_hi,
10583                 binop( Iop_Mul32,
10584                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
10585                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
10586         assign( irt_sum, binop( isAD ? Iop_Add32 : Iop_Sub32,
10587                                 mkexpr(irt_prod_lo), mkexpr(irt_prod_hi) ) );
10588
10589         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_sum), mkexpr(irt_regA));
10590
10591         if (isT)
10592            putIRegT( regD, ire_result, condT );
10593         else
10594            putIRegA( regD, ire_result, condT, Ijk_Boring );
10595
10596         if (isAD) {
10597            or_into_QFLAG32(
10598               signed_overflow_after_Add32( mkexpr(irt_sum),
10599                                            irt_prod_lo, irt_prod_hi ),
10600               condT
10601            );
10602         }
10603
10604         or_into_QFLAG32(
10605            signed_overflow_after_Add32( ire_result, irt_sum, irt_regA ),
10606            condT
10607         );
10608
10609         DIP("sml%cd%s%s r%u, r%u, r%u, r%u\n",
10610             isAD ? 'a' : 's',
10611             bitM ? "x" : "", nCC(conq), regD, regN, regM, regA);
10612         return True;
10613      }
10614      /* fall through */
10615    }
10616
10617    /* ----- smlabb, smlabt, smlatb, smlatt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10618    {
10619      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99, bitN = 99;
10620      Bool gate = False;
10621
10622      if (isT) {
10623         if (INSNT0(15,4) == 0xFB1 && INSNT1(7,6) == BITS2(0,0)) {
10624            regN = INSNT0(3,0);
10625            regD = INSNT1(11,8);
10626            regM = INSNT1(3,0);
10627            regA = INSNT1(15,12);
10628            bitM = INSNT1(4,4);
10629            bitN = INSNT1(5,5);
10630            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10631                && !isBadRegT(regA))
10632               gate = True;
10633         }
10634      } else {
10635         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
10636             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10637            regD = INSNA(19,16);
10638            regN = INSNA(3,0);
10639            regM = INSNA(11,8);
10640            regA = INSNA(15,12);
10641            bitM = INSNA(6,6);
10642            bitN = INSNA(5,5);
10643            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10644               gate = True;
10645         }
10646      }
10647
10648      if (gate) {
10649         IRTemp irt_regA = newTemp(Ity_I32);
10650         IRTemp irt_prod = newTemp(Ity_I32);
10651
10652         assign( irt_prod,
10653                 binop(Iop_Mul32,
10654                       binop(Iop_Sar32,
10655                             binop(Iop_Shl32,
10656                                   isT ? getIRegT(regN) : getIRegA(regN),
10657                                   mkU8(bitN ? 0 : 16)),
10658                             mkU8(16)),
10659                       binop(Iop_Sar32,
10660                             binop(Iop_Shl32,
10661                                   isT ? getIRegT(regM) : getIRegA(regM),
10662                                   mkU8(bitM ? 0 : 16)),
10663                             mkU8(16))) );
10664
10665         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10666
10667         IRExpr* ire_result = binop(Iop_Add32, mkexpr(irt_prod), mkexpr(irt_regA));
10668
10669         if (isT)
10670            putIRegT( regD, ire_result, condT );
10671         else
10672            putIRegA( regD, ire_result, condT, Ijk_Boring );
10673
10674         or_into_QFLAG32(
10675            signed_overflow_after_Add32( ire_result, irt_prod, irt_regA ),
10676            condT
10677         );
10678
10679         DIP( "smla%c%c%s r%u, r%u, r%u, r%u\n",
10680              bitN ? 't' : 'b', bitM ? 't' : 'b',
10681              nCC(conq), regD, regN, regM, regA );
10682         return True;
10683      }
10684      /* fall through */
10685    }
10686
10687    /* ----- smlalbb, smlalbt, smlaltb, smlaltt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10688    {
10689      UInt regDHi = 99, regN = 99, regM = 99, regDLo = 99, bitM = 99, bitN = 99;
10690      Bool gate = False;
10691
10692      if (isT) {
10693         if (INSNT0(15,4) == 0xFBC && INSNT1(7,6) == BITS2(1,0)) {
10694            regN   = INSNT0(3,0);
10695            regDHi = INSNT1(11,8);
10696            regM   = INSNT1(3,0);
10697            regDLo = INSNT1(15,12);
10698            bitM   = INSNT1(4,4);
10699            bitN   = INSNT1(5,5);
10700            if (!isBadRegT(regDHi) && !isBadRegT(regN) && !isBadRegT(regM)
10701                && !isBadRegT(regDLo) && regDHi != regDLo)
10702               gate = True;
10703         }
10704      } else {
10705         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
10706             (INSNA(7,4) & BITS4(1,0,0,1)) == BITS4(1,0,0,0)) {
10707            regDHi = INSNA(19,16);
10708            regN   = INSNA(3,0);
10709            regM   = INSNA(11,8);
10710            regDLo = INSNA(15,12);
10711            bitM   = INSNA(6,6);
10712            bitN   = INSNA(5,5);
10713            if (regDHi != 15 && regN != 15 && regM != 15 && regDLo != 15 &&
10714                regDHi != regDLo)
10715               gate = True;
10716         }
10717      }
10718
10719      if (gate) {
10720         IRTemp irt_regD  = newTemp(Ity_I64);
10721         IRTemp irt_prod  = newTemp(Ity_I64);
10722         IRTemp irt_res   = newTemp(Ity_I64);
10723         IRTemp irt_resHi = newTemp(Ity_I32);
10724         IRTemp irt_resLo = newTemp(Ity_I32);
10725
10726         assign( irt_prod,
10727                 binop(Iop_MullS32,
10728                       binop(Iop_Sar32,
10729                             binop(Iop_Shl32,
10730                                   isT ? getIRegT(regN) : getIRegA(regN),
10731                                   mkU8(bitN ? 0 : 16)),
10732                             mkU8(16)),
10733                       binop(Iop_Sar32,
10734                             binop(Iop_Shl32,
10735                                   isT ? getIRegT(regM) : getIRegA(regM),
10736                                   mkU8(bitM ? 0 : 16)),
10737                             mkU8(16))) );
10738
10739         assign( irt_regD, binop(Iop_32HLto64,
10740                                 isT ? getIRegT(regDHi) : getIRegA(regDHi),
10741                                 isT ? getIRegT(regDLo) : getIRegA(regDLo)) );
10742         assign( irt_res, binop(Iop_Add64, mkexpr(irt_regD), mkexpr(irt_prod)) );
10743         assign( irt_resHi, unop(Iop_64HIto32, mkexpr(irt_res)) );
10744         assign( irt_resLo, unop(Iop_64to32, mkexpr(irt_res)) );
10745
10746         if (isT) {
10747            putIRegT( regDHi, mkexpr(irt_resHi), condT );
10748            putIRegT( regDLo, mkexpr(irt_resLo), condT );
10749         } else {
10750            putIRegA( regDHi, mkexpr(irt_resHi), condT, Ijk_Boring );
10751            putIRegA( regDLo, mkexpr(irt_resLo), condT, Ijk_Boring );
10752         }
10753
10754         DIP( "smlal%c%c%s r%u, r%u, r%u, r%u\n",
10755              bitN ? 't' : 'b', bitM ? 't' : 'b',
10756              nCC(conq), regDHi, regN, regM, regDLo );
10757         return True;
10758      }
10759      /* fall through */
10760    }
10761
10762    /* ----- smlawb, smlawt <Rd>,<Rn>,<Rm>,<Ra> ----- */
10763    {
10764      UInt regD = 99, regN = 99, regM = 99, regA = 99, bitM = 99;
10765      Bool gate = False;
10766
10767      if (isT) {
10768         if (INSNT0(15,4) == 0xFB3 && INSNT1(7,5) == BITS3(0,0,0)) {
10769            regN = INSNT0(3,0);
10770            regD = INSNT1(11,8);
10771            regM = INSNT1(3,0);
10772            regA = INSNT1(15,12);
10773            bitM = INSNT1(4,4);
10774            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM)
10775                && !isBadRegT(regA))
10776               gate = True;
10777         }
10778      } else {
10779         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
10780             (INSNA(7,4) & BITS4(1,0,1,1)) == BITS4(1,0,0,0)) {
10781            regD = INSNA(19,16);
10782            regN = INSNA(3,0);
10783            regM = INSNA(11,8);
10784            regA = INSNA(15,12);
10785            bitM = INSNA(6,6);
10786            if (regD != 15 && regN != 15 && regM != 15 && regA != 15)
10787               gate = True;
10788         }
10789      }
10790
10791      if (gate) {
10792         IRTemp irt_regA = newTemp(Ity_I32);
10793         IRTemp irt_prod = newTemp(Ity_I64);
10794
10795         assign( irt_prod,
10796                 binop(Iop_MullS32,
10797                       isT ? getIRegT(regN) : getIRegA(regN),
10798                       binop(Iop_Sar32,
10799                             binop(Iop_Shl32,
10800                                   isT ? getIRegT(regM) : getIRegA(regM),
10801                                   mkU8(bitM ? 0 : 16)),
10802                             mkU8(16))) );
10803
10804         assign( irt_regA, isT ? getIRegT(regA) : getIRegA(regA) );
10805
10806         IRTemp prod32 = newTemp(Ity_I32);
10807         assign(prod32,
10808                binop(Iop_Or32,
10809                      binop(Iop_Shl32, unop(Iop_64HIto32, mkexpr(irt_prod)), mkU8(16)),
10810                      binop(Iop_Shr32, unop(Iop_64to32, mkexpr(irt_prod)), mkU8(16))
10811         ));
10812
10813         IRExpr* ire_result = binop(Iop_Add32, mkexpr(prod32), mkexpr(irt_regA));
10814
10815         if (isT)
10816            putIRegT( regD, ire_result, condT );
10817         else
10818            putIRegA( regD, ire_result, condT, Ijk_Boring );
10819
10820         or_into_QFLAG32(
10821            signed_overflow_after_Add32( ire_result, prod32, irt_regA ),
10822            condT
10823         );
10824
10825         DIP( "smlaw%c%s r%u, r%u, r%u, r%u\n",
10826              bitM ? 't' : 'b',
10827              nCC(conq), regD, regN, regM, regA );
10828         return True;
10829      }
10830      /* fall through */
10831    }
10832
10833    /* ------------------- sel<c> <Rd>,<Rn>,<Rm> -------------------- */
10834    /* fixme: fix up the test in v6media.c so that we can pass the ge
10835       flags as part of the test. */
10836    {
10837      UInt regD = 99, regN = 99, regM = 99;
10838      Bool gate = False;
10839
10840      if (isT) {
10841         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
10842            regN = INSNT0(3,0);
10843            regD = INSNT1(11,8);
10844            regM = INSNT1(3,0);
10845            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10846               gate = True;
10847         }
10848      } else {
10849         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
10850             INSNA(11,8)  == BITS4(1,1,1,1)         &&
10851             INSNA(7,4)   == BITS4(1,0,1,1)) {
10852            regD = INSNA(15,12);
10853            regN = INSNA(19,16);
10854            regM = INSNA(3,0);
10855            if (regD != 15 && regN != 15 && regM != 15)
10856               gate = True;
10857         }
10858      }
10859
10860      if (gate) {
10861         IRTemp irt_ge_flag0 = newTemp(Ity_I32);
10862         IRTemp irt_ge_flag1 = newTemp(Ity_I32);
10863         IRTemp irt_ge_flag2 = newTemp(Ity_I32);
10864         IRTemp irt_ge_flag3 = newTemp(Ity_I32);
10865
10866         assign( irt_ge_flag0, get_GEFLAG32(0) );
10867         assign( irt_ge_flag1, get_GEFLAG32(1) );
10868         assign( irt_ge_flag2, get_GEFLAG32(2) );
10869         assign( irt_ge_flag3, get_GEFLAG32(3) );
10870
10871         IRExpr* ire_ge_flag0_or
10872           = binop(Iop_Or32, mkexpr(irt_ge_flag0),
10873                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag0)));
10874         IRExpr* ire_ge_flag1_or
10875           = binop(Iop_Or32, mkexpr(irt_ge_flag1),
10876                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag1)));
10877         IRExpr* ire_ge_flag2_or
10878           = binop(Iop_Or32, mkexpr(irt_ge_flag2),
10879                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag2)));
10880         IRExpr* ire_ge_flag3_or
10881           = binop(Iop_Or32, mkexpr(irt_ge_flag3),
10882                   binop(Iop_Sub32, mkU32(0), mkexpr(irt_ge_flag3)));
10883
10884         IRExpr* ire_ge_flags
10885           = binop( Iop_Or32,
10886                    binop(Iop_Or32,
10887                          binop(Iop_And32,
10888                                binop(Iop_Sar32, ire_ge_flag0_or, mkU8(31)),
10889                                mkU32(0x000000ff)),
10890                          binop(Iop_And32,
10891                                binop(Iop_Sar32, ire_ge_flag1_or, mkU8(31)),
10892                                mkU32(0x0000ff00))),
10893                    binop(Iop_Or32,
10894                          binop(Iop_And32,
10895                                binop(Iop_Sar32, ire_ge_flag2_or, mkU8(31)),
10896                                mkU32(0x00ff0000)),
10897                          binop(Iop_And32,
10898                                binop(Iop_Sar32, ire_ge_flag3_or, mkU8(31)),
10899                                mkU32(0xff000000))) );
10900
10901         IRExpr* ire_result
10902           = binop(Iop_Or32,
10903                   binop(Iop_And32,
10904                         isT ? getIRegT(regN) : getIRegA(regN),
10905                         ire_ge_flags ),
10906                   binop(Iop_And32,
10907                         isT ? getIRegT(regM) : getIRegA(regM),
10908                         unop(Iop_Not32, ire_ge_flags)));
10909
10910         if (isT)
10911            putIRegT( regD, ire_result, condT );
10912         else
10913            putIRegA( regD, ire_result, condT, Ijk_Boring );
10914
10915         DIP("sel%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
10916         return True;
10917      }
10918      /* fall through */
10919    }
10920
10921    /* ----------------- uxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
10922    {
10923      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
10924      Bool gate = False;
10925
10926      if (isT) {
10927         if (INSNT0(15,4) == 0xFA3 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
10928            regN   = INSNT0(3,0);
10929            regD   = INSNT1(11,8);
10930            regM   = INSNT1(3,0);
10931            rotate = INSNT1(5,4);
10932            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
10933               gate = True;
10934         }
10935      } else {
10936         if (INSNA(27,20) == BITS8(0,1,1,0,1,1,0,0) &&
10937             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
10938            regD   = INSNA(15,12);
10939            regN   = INSNA(19,16);
10940            regM   = INSNA(3,0);
10941            rotate = INSNA(11,10);
10942            if (regD != 15 && regN != 15 && regM != 15)
10943              gate = True;
10944         }
10945      }
10946
10947      if (gate) {
10948         IRTemp irt_regN = newTemp(Ity_I32);
10949         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
10950
10951         IRTemp irt_regM = newTemp(Ity_I32);
10952         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
10953
10954         IRTemp irt_rot = newTemp(Ity_I32);
10955         assign( irt_rot, binop(Iop_And32,
10956                                genROR32(irt_regM, 8 * rotate),
10957                                mkU32(0x00FF00FF)) );
10958
10959         IRExpr* resLo
10960            = binop(Iop_And32,
10961                    binop(Iop_Add32, mkexpr(irt_regN), mkexpr(irt_rot)),
10962                    mkU32(0x0000FFFF));
10963
10964         IRExpr* resHi
10965            = binop(Iop_Add32,
10966                    binop(Iop_And32, mkexpr(irt_regN), mkU32(0xFFFF0000)),
10967                    binop(Iop_And32, mkexpr(irt_rot),  mkU32(0xFFFF0000)));
10968
10969         IRExpr* ire_result
10970            = binop( Iop_Or32, resHi, resLo );
10971
10972         if (isT)
10973            putIRegT( regD, ire_result, condT );
10974         else
10975            putIRegA( regD, ire_result, condT, Ijk_Boring );
10976
10977         DIP( "uxtab16%s r%u, r%u, r%u, ROR #%u\n",
10978              nCC(conq), regD, regN, regM, 8 * rotate );
10979         return True;
10980      }
10981      /* fall through */
10982    }
10983
10984    /* --------------- usad8  Rd,Rn,Rm    ---------------- */
10985    /* --------------- usada8 Rd,Rn,Rm,Ra ---------------- */
10986    {
10987      UInt rD = 99, rN = 99, rM = 99, rA = 99;
10988      Bool gate = False;
10989
10990      if (isT) {
10991        if (INSNT0(15,4) == 0xFB7 && INSNT1(7,4) == BITS4(0,0,0,0)) {
10992            rN = INSNT0(3,0);
10993            rA = INSNT1(15,12);
10994            rD = INSNT1(11,8);
10995            rM = INSNT1(3,0);
10996            if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && rA != 13)
10997               gate = True;
10998         }
10999      } else {
11000         if (INSNA(27,20) == BITS8(0,1,1,1,1,0,0,0) &&
11001             INSNA(7,4)   == BITS4(0,0,0,1) ) {
11002            rD = INSNA(19,16);
11003            rA = INSNA(15,12);
11004            rM = INSNA(11,8);
11005            rN = INSNA(3,0);
11006            if (rD != 15 && rN != 15 && rM != 15 /* but rA can be 15 */)
11007               gate = True;
11008         }
11009      }
11010      /* We allow rA == 15, to denote the usad8 (no accumulator) case. */
11011
11012      if (gate) {
11013         IRExpr* rNe = isT ? getIRegT(rN) : getIRegA(rN);
11014         IRExpr* rMe = isT ? getIRegT(rM) : getIRegA(rM);
11015         IRExpr* rAe = rA == 15 ? mkU32(0)
11016                                : (isT ? getIRegT(rA) : getIRegA(rA));
11017         IRExpr* res = binop(Iop_Add32,
11018                             binop(Iop_Sad8Ux4, rNe, rMe),
11019                             rAe);
11020         if (isT)
11021            putIRegT( rD, res, condT );
11022         else
11023            putIRegA( rD, res, condT, Ijk_Boring );
11024
11025         if (rA == 15) {
11026            DIP( "usad8%s r%u, r%u, r%u\n",
11027                 nCC(conq), rD, rN, rM );
11028         } else {
11029            DIP( "usada8%s r%u, r%u, r%u, r%u\n",
11030                 nCC(conq), rD, rN, rM, rA );
11031         }
11032         return True;
11033      }
11034      /* fall through */
11035    }
11036
11037    /* ------------------ qadd<c> <Rd>,<Rn>,<Rm> ------------------- */
11038    {
11039      UInt regD = 99, regN = 99, regM = 99;
11040      Bool gate = False;
11041
11042      if (isT) {
11043         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF080) {
11044            regN = INSNT0(3,0);
11045            regD = INSNT1(11,8);
11046            regM = INSNT1(3,0);
11047            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11048               gate = True;
11049         }
11050      } else {
11051         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,0,0) &&
11052             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11053             INSNA(7,4)   == BITS4(0,1,0,1)) {
11054            regD = INSNA(15,12);
11055            regN = INSNA(19,16);
11056            regM = INSNA(3,0);
11057            if (regD != 15 && regN != 15 && regM != 15)
11058               gate = True;
11059         }
11060      }
11061
11062      if (gate) {
11063         IRTemp rNt   = newTemp(Ity_I32);
11064         IRTemp rMt   = newTemp(Ity_I32);
11065         IRTemp res_q = newTemp(Ity_I32);
11066
11067         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11068         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11069
11070         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rNt)));
11071         if (isT)
11072            putIRegT( regD, mkexpr(res_q), condT );
11073         else
11074            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11075
11076         or_into_QFLAG32(
11077            signed_overflow_after_Add32(
11078               binop(Iop_Add32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11079            condT
11080         );
11081
11082         DIP("qadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11083         return True;
11084      }
11085      /* fall through */
11086    }
11087
11088    /* ------------------ qdadd<c> <Rd>,<Rm>,<Rn> ------------------- */
11089    {
11090      UInt regD = 99, regN = 99, regM = 99;
11091      Bool gate = False;
11092
11093      if (isT) {
11094         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF090) {
11095            regN = INSNT0(3,0);
11096            regD = INSNT1(11,8);
11097            regM = INSNT1(3,0);
11098            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11099               gate = True;
11100         }
11101      } else {
11102         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,0,0) &&
11103             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11104             INSNA(7,4)   == BITS4(0,1,0,1)) {
11105            regD = INSNA(15,12);
11106            regN = INSNA(19,16);
11107            regM = INSNA(3,0);
11108            if (regD != 15 && regN != 15 && regM != 15)
11109               gate = True;
11110         }
11111      }
11112
11113      if (gate) {
11114         IRTemp rNt   = newTemp(Ity_I32);
11115         IRTemp rMt   = newTemp(Ity_I32);
11116         IRTemp rN_d  = newTemp(Ity_I32);
11117         IRTemp res_q = newTemp(Ity_I32);
11118
11119         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11120         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11121
11122         or_into_QFLAG32(
11123            signed_overflow_after_Add32(
11124               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11125            condT
11126         );
11127
11128         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11129         assign(res_q, binop(Iop_QAdd32S, mkexpr(rMt), mkexpr(rN_d)));
11130         if (isT)
11131            putIRegT( regD, mkexpr(res_q), condT );
11132         else
11133            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11134
11135         or_into_QFLAG32(
11136            signed_overflow_after_Add32(
11137               binop(Iop_Add32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11138            condT
11139         );
11140
11141         DIP("qdadd%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11142         return True;
11143      }
11144      /* fall through */
11145    }
11146
11147    /* ------------------ qsub<c> <Rd>,<Rn>,<Rm> ------------------- */
11148    {
11149      UInt regD = 99, regN = 99, regM = 99;
11150      Bool gate = False;
11151
11152      if (isT) {
11153         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0A0) {
11154            regN = INSNT0(3,0);
11155            regD = INSNT1(11,8);
11156            regM = INSNT1(3,0);
11157            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11158               gate = True;
11159         }
11160      } else {
11161         if (INSNA(27,20) == BITS8(0,0,0,1,0,0,1,0) &&
11162             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11163             INSNA(7,4)   == BITS4(0,1,0,1)) {
11164            regD = INSNA(15,12);
11165            regN = INSNA(19,16);
11166            regM = INSNA(3,0);
11167            if (regD != 15 && regN != 15 && regM != 15)
11168               gate = True;
11169         }
11170      }
11171
11172      if (gate) {
11173         IRTemp rNt   = newTemp(Ity_I32);
11174         IRTemp rMt   = newTemp(Ity_I32);
11175         IRTemp res_q = newTemp(Ity_I32);
11176
11177         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11178         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11179
11180         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rNt)));
11181         if (isT)
11182            putIRegT( regD, mkexpr(res_q), condT );
11183         else
11184            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11185
11186         or_into_QFLAG32(
11187            signed_overflow_after_Sub32(
11188               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rNt)), rMt, rNt),
11189            condT
11190         );
11191
11192         DIP("qsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11193         return True;
11194      }
11195      /* fall through */
11196    }
11197
11198    /* ------------------ qdsub<c> <Rd>,<Rm>,<Rn> ------------------- */
11199    {
11200      UInt regD = 99, regN = 99, regM = 99;
11201      Bool gate = False;
11202
11203      if (isT) {
11204         if (INSNT0(15,4) == 0xFA8 && (INSNT1(15,0) & 0xF0F0) == 0xF0B0) {
11205            regN = INSNT0(3,0);
11206            regD = INSNT1(11,8);
11207            regM = INSNT1(3,0);
11208            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11209               gate = True;
11210         }
11211      } else {
11212         if (INSNA(27,20) == BITS8(0,0,0,1,0,1,1,0) &&
11213             INSNA(11,8)  == BITS4(0,0,0,0)         &&
11214             INSNA(7,4)   == BITS4(0,1,0,1)) {
11215            regD = INSNA(15,12);
11216            regN = INSNA(19,16);
11217            regM = INSNA(3,0);
11218            if (regD != 15 && regN != 15 && regM != 15)
11219               gate = True;
11220         }
11221      }
11222
11223      if (gate) {
11224         IRTemp rNt   = newTemp(Ity_I32);
11225         IRTemp rMt   = newTemp(Ity_I32);
11226         IRTemp rN_d  = newTemp(Ity_I32);
11227         IRTemp res_q = newTemp(Ity_I32);
11228
11229         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11230         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11231
11232         or_into_QFLAG32(
11233            signed_overflow_after_Add32(
11234               binop(Iop_Add32, mkexpr(rNt), mkexpr(rNt)), rNt, rNt),
11235            condT
11236         );
11237
11238         assign(rN_d,  binop(Iop_QAdd32S, mkexpr(rNt), mkexpr(rNt)));
11239         assign(res_q, binop(Iop_QSub32S, mkexpr(rMt), mkexpr(rN_d)));
11240         if (isT)
11241            putIRegT( regD, mkexpr(res_q), condT );
11242         else
11243            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11244
11245         or_into_QFLAG32(
11246            signed_overflow_after_Sub32(
11247               binop(Iop_Sub32, mkexpr(rMt), mkexpr(rN_d)), rMt, rN_d),
11248            condT
11249         );
11250
11251         DIP("qdsub%s r%u, r%u, r%u\n", nCC(conq),regD,regM,regN);
11252         return True;
11253      }
11254      /* fall through */
11255    }
11256
11257    /* ------------------ uqsub16<c> <Rd>,<Rn>,<Rm> ------------------ */
11258    {
11259      UInt regD = 99, regN = 99, regM = 99;
11260      Bool gate = False;
11261
11262      if (isT) {
11263         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11264            regN = INSNT0(3,0);
11265            regD = INSNT1(11,8);
11266            regM = INSNT1(3,0);
11267            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11268               gate = True;
11269         }
11270      } else {
11271         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11272             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11273             INSNA(7,4)   == BITS4(0,1,1,1)) {
11274            regD = INSNA(15,12);
11275            regN = INSNA(19,16);
11276            regM = INSNA(3,0);
11277            if (regD != 15 && regN != 15 && regM != 15)
11278              gate = True;
11279         }
11280      }
11281
11282      if (gate) {
11283         IRTemp rNt   = newTemp(Ity_I32);
11284         IRTemp rMt   = newTemp(Ity_I32);
11285         IRTemp res_q = newTemp(Ity_I32);
11286
11287         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11288         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11289
11290         assign(res_q, binop(Iop_QSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11291         if (isT)
11292            putIRegT( regD, mkexpr(res_q), condT );
11293         else
11294            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11295
11296         DIP("uqsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11297         return True;
11298      }
11299      /* fall through */
11300    }
11301
11302    /* ----------------- shadd16<c> <Rd>,<Rn>,<Rm> ------------------- */
11303    {
11304      UInt regD = 99, regN = 99, regM = 99;
11305      Bool gate = False;
11306
11307      if (isT) {
11308         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11309            regN = INSNT0(3,0);
11310            regD = INSNT1(11,8);
11311            regM = INSNT1(3,0);
11312            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11313               gate = True;
11314         }
11315      } else {
11316         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11317             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11318             INSNA(7,4)   == BITS4(0,0,0,1)) {
11319            regD = INSNA(15,12);
11320            regN = INSNA(19,16);
11321            regM = INSNA(3,0);
11322            if (regD != 15 && regN != 15 && regM != 15)
11323               gate = True;
11324         }
11325      }
11326
11327      if (gate) {
11328         IRTemp rNt   = newTemp(Ity_I32);
11329         IRTemp rMt   = newTemp(Ity_I32);
11330         IRTemp res_q = newTemp(Ity_I32);
11331
11332         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11333         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11334
11335         assign(res_q, binop(Iop_HAdd16Sx2, mkexpr(rNt), mkexpr(rMt)));
11336         if (isT)
11337            putIRegT( regD, mkexpr(res_q), condT );
11338         else
11339            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11340
11341         DIP("shadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11342         return True;
11343      }
11344      /* fall through */
11345    }
11346
11347    /* ----------------- uhsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11348    {
11349      UInt regD = 99, regN = 99, regM = 99;
11350      Bool gate = False;
11351
11352      if (isT) {
11353         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11354            regN = INSNT0(3,0);
11355            regD = INSNT1(11,8);
11356            regM = INSNT1(3,0);
11357            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11358               gate = True;
11359         }
11360      } else {
11361         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11362             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11363             INSNA(7,4)   == BITS4(1,1,1,1)) {
11364            regD = INSNA(15,12);
11365            regN = INSNA(19,16);
11366            regM = INSNA(3,0);
11367            if (regD != 15 && regN != 15 && regM != 15)
11368               gate = True;
11369         }
11370      }
11371
11372      if (gate) {
11373         IRTemp rNt   = newTemp(Ity_I32);
11374         IRTemp rMt   = newTemp(Ity_I32);
11375         IRTemp res_q = newTemp(Ity_I32);
11376
11377         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11378         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11379
11380         assign(res_q, binop(Iop_HSub8Ux4, mkexpr(rNt), mkexpr(rMt)));
11381         if (isT)
11382            putIRegT( regD, mkexpr(res_q), condT );
11383         else
11384            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11385
11386         DIP("uhsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11387         return True;
11388      }
11389      /* fall through */
11390    }
11391
11392    /* ----------------- uhsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
11393    {
11394      UInt regD = 99, regN = 99, regM = 99;
11395      Bool gate = False;
11396
11397      if (isT) {
11398         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
11399            regN = INSNT0(3,0);
11400            regD = INSNT1(11,8);
11401            regM = INSNT1(3,0);
11402            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11403               gate = True;
11404         }
11405      } else {
11406         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
11407             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11408             INSNA(7,4)   == BITS4(0,1,1,1)) {
11409            regD = INSNA(15,12);
11410            regN = INSNA(19,16);
11411            regM = INSNA(3,0);
11412            if (regD != 15 && regN != 15 && regM != 15)
11413               gate = True;
11414         }
11415      }
11416
11417      if (gate) {
11418         IRTemp rNt   = newTemp(Ity_I32);
11419         IRTemp rMt   = newTemp(Ity_I32);
11420         IRTemp res_q = newTemp(Ity_I32);
11421
11422         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11423         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11424
11425         assign(res_q, binop(Iop_HSub16Ux2, mkexpr(rNt), mkexpr(rMt)));
11426         if (isT)
11427            putIRegT( regD, mkexpr(res_q), condT );
11428         else
11429            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11430
11431         DIP("uhsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11432         return True;
11433      }
11434      /* fall through */
11435    }
11436
11437    /* ------------------ uqadd16<c> <Rd>,<Rn>,<Rm> ------------------ */
11438    {
11439      UInt regD = 99, regN = 99, regM = 99;
11440      Bool gate = False;
11441
11442      if (isT) {
11443         if (INSNT0(15,4) == 0xFA9 && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11444            regN = INSNT0(3,0);
11445            regD = INSNT1(11,8);
11446            regM = INSNT1(3,0);
11447            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11448               gate = True;
11449         }
11450      } else {
11451         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11452             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11453             INSNA(7,4)   == BITS4(0,0,0,1)) {
11454            regD = INSNA(15,12);
11455            regN = INSNA(19,16);
11456            regM = INSNA(3,0);
11457            if (regD != 15 && regN != 15 && regM != 15)
11458               gate = True;
11459         }
11460      }
11461
11462      if (gate) {
11463         IRTemp rNt   = newTemp(Ity_I32);
11464         IRTemp rMt   = newTemp(Ity_I32);
11465         IRTemp res_q = newTemp(Ity_I32);
11466
11467         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11468         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11469
11470         assign(res_q, binop(Iop_QAdd16Ux2, mkexpr(rNt), mkexpr(rMt)));
11471         if (isT)
11472            putIRegT( regD, mkexpr(res_q), condT );
11473         else
11474            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11475
11476         DIP("uqadd16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11477         return True;
11478      }
11479      /* fall through */
11480    }
11481
11482    /* ------------------- uqsax<c> <Rd>,<Rn>,<Rm> ------------------- */
11483    {
11484      UInt regD = 99, regN = 99, regM = 99;
11485      Bool gate = False;
11486
11487      if (isT) {
11488         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11489            regN = INSNT0(3,0);
11490            regD = INSNT1(11,8);
11491            regM = INSNT1(3,0);
11492            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11493               gate = True;
11494         }
11495      } else {
11496         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11497             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11498             INSNA(7,4)   == BITS4(0,1,0,1)) {
11499            regD = INSNA(15,12);
11500            regN = INSNA(19,16);
11501            regM = INSNA(3,0);
11502            if (regD != 15 && regN != 15 && regM != 15)
11503               gate = True;
11504         }
11505      }
11506
11507      if (gate) {
11508         IRTemp irt_regN     = newTemp(Ity_I32);
11509         IRTemp irt_regM     = newTemp(Ity_I32);
11510         IRTemp irt_sum      = newTemp(Ity_I32);
11511         IRTemp irt_diff     = newTemp(Ity_I32);
11512         IRTemp irt_sum_res  = newTemp(Ity_I32);
11513         IRTemp irt_diff_res = newTemp(Ity_I32);
11514
11515         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11516         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11517
11518         assign( irt_diff,
11519                 binop( Iop_Sub32,
11520                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11521                        binop( Iop_Shr32,
11522                               binop(Iop_Shl32, mkexpr(irt_regM), mkU8(16)),
11523                               mkU8(16) ) ) );
11524         armUnsignedSatQ( &irt_diff_res, NULL, irt_diff, 0x10);
11525
11526         assign( irt_sum,
11527                 binop( Iop_Add32,
11528                        binop( Iop_Shr32,
11529                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11530                               mkU8(16) ),
11531                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) )) );
11532         armUnsignedSatQ( &irt_sum_res, NULL, irt_sum, 0x10 );
11533
11534         IRExpr* ire_result = binop( Iop_Or32,
11535                                     binop( Iop_Shl32, mkexpr(irt_diff_res),
11536                                            mkU8(16) ),
11537                                     binop( Iop_And32, mkexpr(irt_sum_res),
11538                                            mkU32(0xFFFF)) );
11539
11540         if (isT)
11541            putIRegT( regD, ire_result, condT );
11542         else
11543            putIRegA( regD, ire_result, condT, Ijk_Boring );
11544
11545         DIP( "uqsax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11546         return True;
11547      }
11548      /* fall through */
11549    }
11550
11551    /* ------------------- uqasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11552    {
11553      UInt regD = 99, regN = 99, regM = 99;
11554      Bool gate = False;
11555
11556      if (isT) {
11557         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF050) {
11558            regN = INSNT0(3,0);
11559            regD = INSNT1(11,8);
11560            regM = INSNT1(3,0);
11561            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11562               gate = True;
11563         }
11564      } else {
11565         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,0) &&
11566             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11567             INSNA(7,4)   == BITS4(0,0,1,1)) {
11568            regD = INSNA(15,12);
11569            regN = INSNA(19,16);
11570            regM = INSNA(3,0);
11571            if (regD != 15 && regN != 15 && regM != 15)
11572               gate = True;
11573         }
11574      }
11575
11576      if (gate) {
11577         IRTemp irt_regN     = newTemp(Ity_I32);
11578         IRTemp irt_regM     = newTemp(Ity_I32);
11579         IRTemp irt_sum      = newTemp(Ity_I32);
11580         IRTemp irt_diff     = newTemp(Ity_I32);
11581         IRTemp irt_res_sum  = newTemp(Ity_I32);
11582         IRTemp irt_res_diff = newTemp(Ity_I32);
11583
11584         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11585         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11586
11587         assign( irt_diff,
11588                 binop( Iop_Sub32,
11589                        binop( Iop_Shr32,
11590                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11591                               mkU8(16) ),
11592                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11593         armUnsignedSatQ( &irt_res_diff, NULL, irt_diff, 0x10 );
11594
11595         assign( irt_sum,
11596                 binop( Iop_Add32,
11597                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11598                        binop( Iop_Shr32,
11599                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11600                               mkU8(16) ) ) );
11601         armUnsignedSatQ( &irt_res_sum, NULL, irt_sum, 0x10 );
11602
11603         IRExpr* ire_result
11604           = binop( Iop_Or32,
11605                    binop( Iop_Shl32, mkexpr(irt_res_sum), mkU8(16) ),
11606                    binop( Iop_And32, mkexpr(irt_res_diff), mkU32(0xFFFF) ) );
11607
11608         if (isT)
11609            putIRegT( regD, ire_result, condT );
11610         else
11611            putIRegA( regD, ire_result, condT, Ijk_Boring );
11612
11613         DIP( "uqasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11614         return True;
11615      }
11616      /* fall through */
11617    }
11618
11619    /* ------------------- usax<c> <Rd>,<Rn>,<Rm> ------------------- */
11620    {
11621      UInt regD = 99, regN = 99, regM = 99;
11622      Bool gate = False;
11623
11624      if (isT) {
11625         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11626            regN = INSNT0(3,0);
11627            regD = INSNT1(11,8);
11628            regM = INSNT1(3,0);
11629            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11630               gate = True;
11631         }
11632      } else {
11633         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11634             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11635             INSNA(7,4)   == BITS4(0,1,0,1)) {
11636            regD = INSNA(15,12);
11637            regN = INSNA(19,16);
11638            regM = INSNA(3,0);
11639            if (regD != 15 && regN != 15 && regM != 15)
11640               gate = True;
11641         }
11642      }
11643
11644      if (gate) {
11645         IRTemp irt_regN = newTemp(Ity_I32);
11646         IRTemp irt_regM = newTemp(Ity_I32);
11647         IRTemp irt_sum  = newTemp(Ity_I32);
11648         IRTemp irt_diff = newTemp(Ity_I32);
11649
11650         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11651         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11652
11653         assign( irt_sum,
11654                 binop( Iop_Add32,
11655                        unop( Iop_16Uto32,
11656                              unop( Iop_32to16, mkexpr(irt_regN) )
11657                        ),
11658                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11659
11660         assign( irt_diff,
11661                 binop( Iop_Sub32,
11662                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11663                        unop( Iop_16Uto32,
11664                              unop( Iop_32to16, mkexpr(irt_regM) )
11665                        )
11666                 )
11667         );
11668
11669         IRExpr* ire_result
11670           = binop( Iop_Or32,
11671                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11672                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11673
11674         IRTemp ge10 = newTemp(Ity_I32);
11675         assign( ge10, IRExpr_ITE( binop( Iop_CmpLE32U,
11676                                          mkU32(0x10000), mkexpr(irt_sum) ),
11677                                   mkU32(1), mkU32(0) ) );
11678         put_GEFLAG32( 0, 0, mkexpr(ge10), condT );
11679         put_GEFLAG32( 1, 0, mkexpr(ge10), condT );
11680
11681         IRTemp ge32 = newTemp(Ity_I32);
11682         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11683         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11684         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11685
11686         if (isT)
11687            putIRegT( regD, ire_result, condT );
11688         else
11689            putIRegA( regD, ire_result, condT, Ijk_Boring );
11690
11691         DIP( "usax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11692         return True;
11693      }
11694      /* fall through */
11695    }
11696
11697    /* ------------------- uasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11698    {
11699      UInt regD = 99, regN = 99, regM = 99;
11700      Bool gate = False;
11701
11702      if (isT) {
11703         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF040) {
11704            regN = INSNT0(3,0);
11705            regD = INSNT1(11,8);
11706            regM = INSNT1(3,0);
11707            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11708               gate = True;
11709         }
11710      } else {
11711         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,0,1) &&
11712             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11713             INSNA(7,4)   == BITS4(0,0,1,1)) {
11714            regD = INSNA(15,12);
11715            regN = INSNA(19,16);
11716            regM = INSNA(3,0);
11717            if (regD != 15 && regN != 15 && regM != 15)
11718               gate = True;
11719         }
11720      }
11721
11722      if (gate) {
11723         IRTemp irt_regN = newTemp(Ity_I32);
11724         IRTemp irt_regM = newTemp(Ity_I32);
11725         IRTemp irt_sum  = newTemp(Ity_I32);
11726         IRTemp irt_diff = newTemp(Ity_I32);
11727
11728         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11729         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11730
11731         assign( irt_diff,
11732                 binop( Iop_Sub32,
11733                        unop( Iop_16Uto32,
11734                              unop( Iop_32to16, mkexpr(irt_regN) )
11735                        ),
11736                        binop( Iop_Shr32, mkexpr(irt_regM), mkU8(16) ) ) );
11737
11738         assign( irt_sum,
11739                 binop( Iop_Add32,
11740                        binop( Iop_Shr32, mkexpr(irt_regN), mkU8(16) ),
11741                        unop( Iop_16Uto32,
11742                              unop( Iop_32to16, mkexpr(irt_regM) )
11743                        ) ) );
11744
11745         IRExpr* ire_result
11746           = binop( Iop_Or32,
11747                    binop( Iop_Shl32, mkexpr(irt_sum), mkU8(16) ),
11748                    binop( Iop_And32, mkexpr(irt_diff), mkU32(0xFFFF) ) );
11749
11750         IRTemp ge10 = newTemp(Ity_I32);
11751         assign(ge10, unop(Iop_Not32, mkexpr(irt_diff)));
11752         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11753         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11754
11755         IRTemp ge32 = newTemp(Ity_I32);
11756         assign( ge32, IRExpr_ITE( binop( Iop_CmpLE32U,
11757                                          mkU32(0x10000), mkexpr(irt_sum) ),
11758                                   mkU32(1), mkU32(0) ) );
11759         put_GEFLAG32( 2, 0, mkexpr(ge32), condT );
11760         put_GEFLAG32( 3, 0, mkexpr(ge32), condT );
11761
11762         if (isT)
11763            putIRegT( regD, ire_result, condT );
11764         else
11765            putIRegA( regD, ire_result, condT, Ijk_Boring );
11766
11767         DIP( "uasx%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11768         return True;
11769      }
11770      /* fall through */
11771    }
11772
11773    /* ------------------- ssax<c> <Rd>,<Rn>,<Rm> ------------------- */
11774    {
11775      UInt regD = 99, regN = 99, regM = 99;
11776      Bool gate = False;
11777
11778      if (isT) {
11779         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF000) {
11780            regN = INSNT0(3,0);
11781            regD = INSNT1(11,8);
11782            regM = INSNT1(3,0);
11783            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11784               gate = True;
11785         }
11786      } else {
11787         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,0,1) &&
11788             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11789             INSNA(7,4)   == BITS4(0,1,0,1)) {
11790            regD = INSNA(15,12);
11791            regN = INSNA(19,16);
11792            regM = INSNA(3,0);
11793            if (regD != 15 && regN != 15 && regM != 15)
11794               gate = True;
11795         }
11796      }
11797
11798      if (gate) {
11799         IRTemp irt_regN = newTemp(Ity_I32);
11800         IRTemp irt_regM = newTemp(Ity_I32);
11801         IRTemp irt_sum  = newTemp(Ity_I32);
11802         IRTemp irt_diff = newTemp(Ity_I32);
11803
11804         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11805         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11806
11807         assign( irt_sum,
11808                 binop( Iop_Add32,
11809                        binop( Iop_Sar32,
11810                               binop( Iop_Shl32, mkexpr(irt_regN), mkU8(16) ),
11811                               mkU8(16) ),
11812                        binop( Iop_Sar32, mkexpr(irt_regM), mkU8(16) ) ) );
11813
11814         assign( irt_diff,
11815                 binop( Iop_Sub32,
11816                        binop( Iop_Sar32, mkexpr(irt_regN), mkU8(16) ),
11817                        binop( Iop_Sar32,
11818                               binop( Iop_Shl32, mkexpr(irt_regM), mkU8(16) ),
11819                               mkU8(16) ) ) );
11820
11821         IRExpr* ire_result
11822           = binop( Iop_Or32,
11823                    binop( Iop_Shl32, mkexpr(irt_diff), mkU8(16) ),
11824                    binop( Iop_And32, mkexpr(irt_sum), mkU32(0xFFFF) ) );
11825
11826         IRTemp ge10 = newTemp(Ity_I32);
11827         assign(ge10, unop(Iop_Not32, mkexpr(irt_sum)));
11828         put_GEFLAG32( 0, 31, mkexpr(ge10), condT );
11829         put_GEFLAG32( 1, 31, mkexpr(ge10), condT );
11830
11831         IRTemp ge32 = newTemp(Ity_I32);
11832         assign(ge32, unop(Iop_Not32, mkexpr(irt_diff)));
11833         put_GEFLAG32( 2, 31, mkexpr(ge32), condT );
11834         put_GEFLAG32( 3, 31, mkexpr(ge32), condT );
11835
11836         if (isT)
11837            putIRegT( regD, ire_result, condT );
11838         else
11839            putIRegA( regD, ire_result, condT, Ijk_Boring );
11840
11841         DIP( "ssax%s r%u, r%u, r%u\n", nCC(conq), regD, regN, regM );
11842         return True;
11843      }
11844      /* fall through */
11845    }
11846
11847    /* ----------------- shsub8<c> <Rd>,<Rn>,<Rm> ------------------- */
11848    {
11849      UInt regD = 99, regN = 99, regM = 99;
11850      Bool gate = False;
11851
11852      if (isT) {
11853         if (INSNT0(15,4) == 0xFAC && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11854            regN = INSNT0(3,0);
11855            regD = INSNT1(11,8);
11856            regM = INSNT1(3,0);
11857            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11858               gate = True;
11859         }
11860      } else {
11861         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11862             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11863             INSNA(7,4)   == BITS4(1,1,1,1)) {
11864            regD = INSNA(15,12);
11865            regN = INSNA(19,16);
11866            regM = INSNA(3,0);
11867            if (regD != 15 && regN != 15 && regM != 15)
11868               gate = True;
11869         }
11870      }
11871
11872      if (gate) {
11873         IRTemp rNt   = newTemp(Ity_I32);
11874         IRTemp rMt   = newTemp(Ity_I32);
11875         IRTemp res_q = newTemp(Ity_I32);
11876
11877         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
11878         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
11879
11880         assign(res_q, binop(Iop_HSub8Sx4, mkexpr(rNt), mkexpr(rMt)));
11881         if (isT)
11882            putIRegT( regD, mkexpr(res_q), condT );
11883         else
11884            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
11885
11886         DIP("shsub8%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
11887         return True;
11888      }
11889      /* fall through */
11890    }
11891
11892    /* ----------------- sxtab16<c> Rd,Rn,Rm{,rot} ------------------ */
11893    {
11894      UInt regD = 99, regN = 99, regM = 99, rotate = 99;
11895      Bool gate = False;
11896
11897      if (isT) {
11898         if (INSNT0(15,4) == 0xFA2 && (INSNT1(15,0) & 0xF0C0) == 0xF080) {
11899            regN   = INSNT0(3,0);
11900            regD   = INSNT1(11,8);
11901            regM   = INSNT1(3,0);
11902            rotate = INSNT1(5,4);
11903            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11904               gate = True;
11905         }
11906      } else {
11907         if (INSNA(27,20) == BITS8(0,1,1,0,1,0,0,0) &&
11908             INSNA(9,4)   == BITS6(0,0,0,1,1,1) ) {
11909            regD   = INSNA(15,12);
11910            regN   = INSNA(19,16);
11911            regM   = INSNA(3,0);
11912            rotate = INSNA(11,10);
11913            if (regD != 15 && regN != 15 && regM != 15)
11914              gate = True;
11915         }
11916      }
11917
11918      if (gate) {
11919         IRTemp irt_regN = newTemp(Ity_I32);
11920         assign( irt_regN, isT ? getIRegT(regN) : getIRegA(regN) );
11921
11922         IRTemp irt_regM = newTemp(Ity_I32);
11923         assign( irt_regM, isT ? getIRegT(regM) : getIRegA(regM) );
11924
11925         IRTemp irt_rot = newTemp(Ity_I32);
11926         assign( irt_rot, genROR32(irt_regM, 8 * rotate) );
11927
11928         /* FIXME Maybe we can write this arithmetic in shorter form. */
11929         IRExpr* resLo
11930            = binop(Iop_And32,
11931                    binop(Iop_Add32,
11932                          mkexpr(irt_regN),
11933                          unop(Iop_16Uto32,
11934                               unop(Iop_8Sto16,
11935                                    unop(Iop_32to8, mkexpr(irt_rot))))),
11936                    mkU32(0x0000FFFF));
11937
11938         IRExpr* resHi
11939            = binop(Iop_And32,
11940                    binop(Iop_Add32,
11941                          mkexpr(irt_regN),
11942                          binop(Iop_Shl32,
11943                                unop(Iop_16Uto32,
11944                                     unop(Iop_8Sto16,
11945                                          unop(Iop_32to8,
11946                                               binop(Iop_Shr32,
11947                                                     mkexpr(irt_rot),
11948                                                     mkU8(16))))),
11949                                mkU8(16))),
11950                    mkU32(0xFFFF0000));
11951
11952         IRExpr* ire_result
11953            = binop( Iop_Or32, resHi, resLo );
11954
11955         if (isT)
11956            putIRegT( regD, ire_result, condT );
11957         else
11958            putIRegA( regD, ire_result, condT, Ijk_Boring );
11959
11960         DIP( "sxtab16%s r%u, r%u, r%u, ROR #%u\n",
11961              nCC(conq), regD, regN, regM, 8 * rotate );
11962         return True;
11963      }
11964      /* fall through */
11965    }
11966
11967    /* ----------------- shasx<c> <Rd>,<Rn>,<Rm> ------------------- */
11968    {
11969      UInt regD = 99, regN = 99, regM = 99;
11970      Bool gate = False;
11971
11972      if (isT) {
11973         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
11974            regN = INSNT0(3,0);
11975            regD = INSNT1(11,8);
11976            regM = INSNT1(3,0);
11977            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
11978               gate = True;
11979         }
11980      } else {
11981         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
11982             INSNA(11,8)  == BITS4(1,1,1,1)         &&
11983             INSNA(7,4)   == BITS4(0,0,1,1)) {
11984            regD = INSNA(15,12);
11985            regN = INSNA(19,16);
11986            regM = INSNA(3,0);
11987            if (regD != 15 && regN != 15 && regM != 15)
11988               gate = True;
11989         }
11990      }
11991
11992      if (gate) {
11993         IRTemp rNt   = newTemp(Ity_I32);
11994         IRTemp rMt   = newTemp(Ity_I32);
11995         IRTemp irt_diff  = newTemp(Ity_I32);
11996         IRTemp irt_sum   = newTemp(Ity_I32);
11997         IRTemp res_q = newTemp(Ity_I32);
11998
11999         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12000         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12001
12002         assign( irt_diff,
12003                 binop(Iop_Sub32,
12004                       unop(Iop_16Sto32,
12005                            unop(Iop_32to16,
12006                                 mkexpr(rNt)
12007                            )
12008                       ),
12009                       unop(Iop_16Sto32,
12010                            unop(Iop_32to16,
12011                                 binop(Iop_Shr32,
12012                                       mkexpr(rMt), mkU8(16)
12013                                 )
12014                            )
12015                       )
12016                 )
12017         );
12018
12019         assign( irt_sum,
12020                 binop(Iop_Add32,
12021                       unop(Iop_16Sto32,
12022                            unop(Iop_32to16,
12023                                 binop(Iop_Shr32,
12024                                       mkexpr(rNt), mkU8(16)
12025                                 )
12026                            )
12027                       ),
12028                       unop(Iop_16Sto32,
12029                            unop(Iop_32to16, mkexpr(rMt)
12030                            )
12031                       )
12032                 )
12033         );
12034
12035         assign( res_q,
12036                 binop(Iop_Or32,
12037                       unop(Iop_16Uto32,
12038                            unop(Iop_32to16,
12039                                 binop(Iop_Shr32,
12040                                       mkexpr(irt_diff), mkU8(1)
12041                                 )
12042                            )
12043                       ),
12044                       binop(Iop_Shl32,
12045                             binop(Iop_Shr32,
12046                                   mkexpr(irt_sum), mkU8(1)
12047                             ),
12048                             mkU8(16)
12049                      )
12050                 )
12051         );
12052
12053         if (isT)
12054            putIRegT( regD, mkexpr(res_q), condT );
12055         else
12056            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12057
12058         DIP("shasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12059         return True;
12060      }
12061      /* fall through */
12062    }
12063
12064    /* ----------------- uhasx<c> <Rd>,<Rn>,<Rm> ------------------- */
12065    {
12066      UInt regD = 99, regN = 99, regM = 99;
12067      Bool gate = False;
12068
12069      if (isT) {
12070         if (INSNT0(15,4) == 0xFAA && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12071            regN = INSNT0(3,0);
12072            regD = INSNT1(11,8);
12073            regM = INSNT1(3,0);
12074            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12075               gate = True;
12076         }
12077      } else {
12078         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12079             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12080             INSNA(7,4)   == BITS4(0,0,1,1)) {
12081            regD = INSNA(15,12);
12082            regN = INSNA(19,16);
12083            regM = INSNA(3,0);
12084            if (regD != 15 && regN != 15 && regM != 15)
12085               gate = True;
12086         }
12087      }
12088
12089      if (gate) {
12090         IRTemp rNt   = newTemp(Ity_I32);
12091         IRTemp rMt   = newTemp(Ity_I32);
12092         IRTemp irt_diff  = newTemp(Ity_I32);
12093         IRTemp irt_sum   = newTemp(Ity_I32);
12094         IRTemp res_q = newTemp(Ity_I32);
12095
12096         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12097         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12098
12099         assign( irt_diff,
12100                 binop(Iop_Sub32,
12101                       unop(Iop_16Uto32,
12102                            unop(Iop_32to16,
12103                                 mkexpr(rNt)
12104                            )
12105                       ),
12106                       unop(Iop_16Uto32,
12107                            unop(Iop_32to16,
12108                                 binop(Iop_Shr32,
12109                                       mkexpr(rMt), mkU8(16)
12110                                 )
12111                            )
12112                       )
12113                 )
12114         );
12115
12116         assign( irt_sum,
12117                 binop(Iop_Add32,
12118                       unop(Iop_16Uto32,
12119                            unop(Iop_32to16,
12120                                 binop(Iop_Shr32,
12121                                       mkexpr(rNt), mkU8(16)
12122                                 )
12123                            )
12124                       ),
12125                       unop(Iop_16Uto32,
12126                            unop(Iop_32to16, mkexpr(rMt)
12127                            )
12128                       )
12129                 )
12130         );
12131
12132         assign( res_q,
12133                 binop(Iop_Or32,
12134                       unop(Iop_16Uto32,
12135                            unop(Iop_32to16,
12136                                 binop(Iop_Shr32,
12137                                       mkexpr(irt_diff), mkU8(1)
12138                                 )
12139                            )
12140                       ),
12141                       binop(Iop_Shl32,
12142                             binop(Iop_Shr32,
12143                                   mkexpr(irt_sum), mkU8(1)
12144                             ),
12145                             mkU8(16)
12146                      )
12147                 )
12148         );
12149
12150         if (isT)
12151            putIRegT( regD, mkexpr(res_q), condT );
12152         else
12153            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12154
12155         DIP("uhasx%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12156         return True;
12157      }
12158      /* fall through */
12159    }
12160
12161    /* ----------------- shsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12162    {
12163      UInt regD = 99, regN = 99, regM = 99;
12164      Bool gate = False;
12165
12166      if (isT) {
12167         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12168            regN = INSNT0(3,0);
12169            regD = INSNT1(11,8);
12170            regM = INSNT1(3,0);
12171            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12172               gate = True;
12173         }
12174      } else {
12175         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12176             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12177             INSNA(7,4)   == BITS4(0,1,0,1)) {
12178            regD = INSNA(15,12);
12179            regN = INSNA(19,16);
12180            regM = INSNA(3,0);
12181            if (regD != 15 && regN != 15 && regM != 15)
12182               gate = True;
12183         }
12184      }
12185
12186      if (gate) {
12187         IRTemp rNt   = newTemp(Ity_I32);
12188         IRTemp rMt   = newTemp(Ity_I32);
12189         IRTemp irt_diff  = newTemp(Ity_I32);
12190         IRTemp irt_sum   = newTemp(Ity_I32);
12191         IRTemp res_q = newTemp(Ity_I32);
12192
12193         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12194         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12195
12196         assign( irt_sum,
12197                 binop(Iop_Add32,
12198                       unop(Iop_16Sto32,
12199                            unop(Iop_32to16,
12200                                 mkexpr(rNt)
12201                            )
12202                       ),
12203                       unop(Iop_16Sto32,
12204                            unop(Iop_32to16,
12205                                 binop(Iop_Shr32,
12206                                       mkexpr(rMt), mkU8(16)
12207                                 )
12208                            )
12209                       )
12210                 )
12211         );
12212
12213         assign( irt_diff,
12214                 binop(Iop_Sub32,
12215                       unop(Iop_16Sto32,
12216                            unop(Iop_32to16,
12217                                 binop(Iop_Shr32,
12218                                       mkexpr(rNt), mkU8(16)
12219                                 )
12220                            )
12221                       ),
12222                       unop(Iop_16Sto32,
12223                            unop(Iop_32to16, mkexpr(rMt)
12224                            )
12225                       )
12226                 )
12227         );
12228
12229         assign( res_q,
12230                 binop(Iop_Or32,
12231                       unop(Iop_16Uto32,
12232                            unop(Iop_32to16,
12233                                 binop(Iop_Shr32,
12234                                       mkexpr(irt_sum), mkU8(1)
12235                                 )
12236                            )
12237                       ),
12238                       binop(Iop_Shl32,
12239                             binop(Iop_Shr32,
12240                                   mkexpr(irt_diff), mkU8(1)
12241                             ),
12242                             mkU8(16)
12243                      )
12244                 )
12245         );
12246
12247         if (isT)
12248            putIRegT( regD, mkexpr(res_q), condT );
12249         else
12250            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12251
12252         DIP("shsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12253         return True;
12254      }
12255      /* fall through */
12256    }
12257
12258    /* ----------------- uhsax<c> <Rd>,<Rn>,<Rm> ------------------- */
12259    {
12260      UInt regD = 99, regN = 99, regM = 99;
12261      Bool gate = False;
12262
12263      if (isT) {
12264         if (INSNT0(15,4) == 0xFAE && (INSNT1(15,0) & 0xF0F0) == 0xF060) {
12265            regN = INSNT0(3,0);
12266            regD = INSNT1(11,8);
12267            regM = INSNT1(3,0);
12268            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12269               gate = True;
12270         }
12271      } else {
12272         if (INSNA(27,20) == BITS8(0,1,1,0,0,1,1,1) &&
12273             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12274             INSNA(7,4)   == BITS4(0,1,0,1)) {
12275            regD = INSNA(15,12);
12276            regN = INSNA(19,16);
12277            regM = INSNA(3,0);
12278            if (regD != 15 && regN != 15 && regM != 15)
12279               gate = True;
12280         }
12281      }
12282
12283      if (gate) {
12284         IRTemp rNt   = newTemp(Ity_I32);
12285         IRTemp rMt   = newTemp(Ity_I32);
12286         IRTemp irt_diff  = newTemp(Ity_I32);
12287         IRTemp irt_sum   = newTemp(Ity_I32);
12288         IRTemp res_q = newTemp(Ity_I32);
12289
12290         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12291         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12292
12293         assign( irt_sum,
12294                 binop(Iop_Add32,
12295                       unop(Iop_16Uto32,
12296                            unop(Iop_32to16,
12297                                 mkexpr(rNt)
12298                            )
12299                       ),
12300                       unop(Iop_16Uto32,
12301                            unop(Iop_32to16,
12302                                 binop(Iop_Shr32,
12303                                       mkexpr(rMt), mkU8(16)
12304                                 )
12305                            )
12306                       )
12307                 )
12308         );
12309
12310         assign( irt_diff,
12311                 binop(Iop_Sub32,
12312                       unop(Iop_16Uto32,
12313                            unop(Iop_32to16,
12314                                 binop(Iop_Shr32,
12315                                       mkexpr(rNt), mkU8(16)
12316                                 )
12317                            )
12318                       ),
12319                       unop(Iop_16Uto32,
12320                            unop(Iop_32to16, mkexpr(rMt)
12321                            )
12322                       )
12323                 )
12324         );
12325
12326         assign( res_q,
12327                 binop(Iop_Or32,
12328                       unop(Iop_16Uto32,
12329                            unop(Iop_32to16,
12330                                 binop(Iop_Shr32,
12331                                       mkexpr(irt_sum), mkU8(1)
12332                                 )
12333                            )
12334                       ),
12335                       binop(Iop_Shl32,
12336                             binop(Iop_Shr32,
12337                                   mkexpr(irt_diff), mkU8(1)
12338                             ),
12339                             mkU8(16)
12340                      )
12341                 )
12342         );
12343
12344         if (isT)
12345            putIRegT( regD, mkexpr(res_q), condT );
12346         else
12347            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12348
12349         DIP("uhsax%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12350         return True;
12351      }
12352      /* fall through */
12353    }
12354
12355    /* ----------------- shsub16<c> <Rd>,<Rn>,<Rm> ------------------- */
12356    {
12357      UInt regD = 99, regN = 99, regM = 99;
12358      Bool gate = False;
12359
12360      if (isT) {
12361         if (INSNT0(15,4) == 0xFAD && (INSNT1(15,0) & 0xF0F0) == 0xF020) {
12362            regN = INSNT0(3,0);
12363            regD = INSNT1(11,8);
12364            regM = INSNT1(3,0);
12365            if (!isBadRegT(regD) && !isBadRegT(regN) && !isBadRegT(regM))
12366               gate = True;
12367         }
12368      } else {
12369         if (INSNA(27,20) == BITS8(0,1,1,0,0,0,1,1) &&
12370             INSNA(11,8)  == BITS4(1,1,1,1)         &&
12371             INSNA(7,4)   == BITS4(0,1,1,1)) {
12372            regD = INSNA(15,12);
12373            regN = INSNA(19,16);
12374            regM = INSNA(3,0);
12375            if (regD != 15 && regN != 15 && regM != 15)
12376               gate = True;
12377         }
12378      }
12379
12380      if (gate) {
12381         IRTemp rNt   = newTemp(Ity_I32);
12382         IRTemp rMt   = newTemp(Ity_I32);
12383         IRTemp res_q = newTemp(Ity_I32);
12384
12385         assign( rNt, isT ? getIRegT(regN) : getIRegA(regN) );
12386         assign( rMt, isT ? getIRegT(regM) : getIRegA(regM) );
12387
12388         assign(res_q, binop(Iop_HSub16Sx2, mkexpr(rNt), mkexpr(rMt)));
12389         if (isT)
12390            putIRegT( regD, mkexpr(res_q), condT );
12391         else
12392            putIRegA( regD, mkexpr(res_q), condT, Ijk_Boring );
12393
12394         DIP("shsub16%s r%u, r%u, r%u\n", nCC(conq),regD,regN,regM);
12395         return True;
12396      }
12397      /* fall through */
12398    }
12399
12400    /* ----------------- smmls{r}<c> <Rd>,<Rn>,<Rm>,<Ra> ------------------- */
12401    {
12402      UInt rD = 99, rN = 99, rM = 99, rA = 99;
12403      Bool round  = False;
12404      Bool gate   = False;
12405
12406      if (isT) {
12407         if (INSNT0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
12408             && INSNT0(6,4) == BITS3(1,1,0)
12409             && INSNT1(7,5) == BITS3(0,0,0)) {
12410            round = INSNT1(4,4);
12411            rA    = INSNT1(15,12);
12412            rD    = INSNT1(11,8);
12413            rM    = INSNT1(3,0);
12414            rN    = INSNT0(3,0);
12415            if (!isBadRegT(rD)
12416                && !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rA))
12417               gate = True;
12418         }
12419      } else {
12420         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,1)
12421             && INSNA(15,12) != BITS4(1,1,1,1)
12422             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(1,1,0,1)) {
12423            round = INSNA(5,5);
12424            rD    = INSNA(19,16);
12425            rA    = INSNA(15,12);
12426            rM    = INSNA(11,8);
12427            rN    = INSNA(3,0);
12428            if (rD != 15 && rM != 15 && rN != 15)
12429               gate = True;
12430         }
12431      }
12432      if (gate) {
12433         IRTemp irt_rA   = newTemp(Ity_I32);
12434         IRTemp irt_rN   = newTemp(Ity_I32);
12435         IRTemp irt_rM   = newTemp(Ity_I32);
12436         assign( irt_rA, isT ? getIRegT(rA) : getIRegA(rA) );
12437         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12438         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12439         IRExpr* res
12440         = unop(Iop_64HIto32,
12441                binop(Iop_Add64,
12442                      binop(Iop_Sub64,
12443                            binop(Iop_32HLto64, mkexpr(irt_rA), mkU32(0)),
12444                            binop(Iop_MullS32, mkexpr(irt_rN), mkexpr(irt_rM))),
12445                      mkU64(round ? 0x80000000ULL : 0ULL)));
12446         if (isT)
12447            putIRegT( rD, res, condT );
12448         else
12449            putIRegA(rD, res, condT, Ijk_Boring);
12450         DIP("smmls%s%s r%u, r%u, r%u, r%u\n",
12451             round ? "r" : "", nCC(conq), rD, rN, rM, rA);
12452         return True;
12453      }
12454      /* fall through */
12455    }
12456
12457    /* -------------- smlald{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12458    {
12459      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12460      Bool m_swap = False;
12461      Bool gate   = False;
12462
12463      if (isT) {
12464         if (INSNT0(15,4) == 0xFBC &&
12465             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0)) {
12466            rN     = INSNT0(3,0);
12467            rDlo   = INSNT1(15,12);
12468            rDhi   = INSNT1(11,8);
12469            rM     = INSNT1(3,0);
12470            m_swap = (INSNT1(4,4) & 1) == 1;
12471            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
12472                && !isBadRegT(rM) && rDhi != rDlo)
12473               gate = True;
12474         }
12475      } else {
12476         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0)
12477             && (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
12478            rN     = INSNA(3,0);
12479            rDlo   = INSNA(15,12);
12480            rDhi   = INSNA(19,16);
12481            rM     = INSNA(11,8);
12482            m_swap = ( INSNA(5,5) & 1 ) == 1;
12483            if (rDlo != 15 && rDhi != 15
12484                && rN != 15 && rM != 15 && rDlo != rDhi)
12485               gate = True;
12486         }
12487      }
12488
12489      if (gate) {
12490         IRTemp irt_rM   = newTemp(Ity_I32);
12491         IRTemp irt_rN   = newTemp(Ity_I32);
12492         IRTemp irt_rDhi = newTemp(Ity_I32);
12493         IRTemp irt_rDlo = newTemp(Ity_I32);
12494         IRTemp op_2     = newTemp(Ity_I32);
12495         IRTemp pr_1     = newTemp(Ity_I64);
12496         IRTemp pr_2     = newTemp(Ity_I64);
12497         IRTemp result   = newTemp(Ity_I64);
12498         IRTemp resHi    = newTemp(Ity_I32);
12499         IRTemp resLo    = newTemp(Ity_I32);
12500         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM));
12501         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN));
12502         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi));
12503         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo));
12504         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12505         assign( pr_1, binop(Iop_MullS32,
12506                             unop(Iop_16Sto32,
12507                                  unop(Iop_32to16, mkexpr(irt_rN))
12508                             ),
12509                             unop(Iop_16Sto32,
12510                                  unop(Iop_32to16, mkexpr(op_2))
12511                             )
12512                       )
12513         );
12514         assign( pr_2, binop(Iop_MullS32,
12515                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12516                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12517                       )
12518         );
12519         assign( result, binop(Iop_Add64,
12520                               binop(Iop_Add64,
12521                                     mkexpr(pr_1),
12522                                     mkexpr(pr_2)
12523                               ),
12524                               binop(Iop_32HLto64,
12525                                     mkexpr(irt_rDhi),
12526                                     mkexpr(irt_rDlo)
12527                               )
12528                         )
12529         );
12530         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12531         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12532         if (isT) {
12533            putIRegT( rDhi, mkexpr(resHi), condT );
12534            putIRegT( rDlo, mkexpr(resLo), condT );
12535         } else {
12536            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12537            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12538         }
12539         DIP("smlald%c%s r%u, r%u, r%u, r%u\n",
12540             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12541         return True;
12542      }
12543      /* fall through */
12544    }
12545
12546    /* -------------- smlsld{x}<c> <RdLo>,<RdHi>,<Rn>,<Rm> ---------------- */
12547    {
12548      UInt rN = 99, rDlo = 99, rDhi = 99, rM = 99;
12549      Bool m_swap = False;
12550      Bool gate   = False;
12551
12552      if (isT) {
12553         if ((INSNT0(15,4) == 0xFBD &&
12554             (INSNT1(7,4) & BITS4(1,1,1,0)) == BITS4(1,1,0,0))) {
12555            rN     = INSNT0(3,0);
12556            rDlo   = INSNT1(15,12);
12557            rDhi   = INSNT1(11,8);
12558            rM     = INSNT1(3,0);
12559            m_swap = (INSNT1(4,4) & 1) == 1;
12560            if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN) &&
12561                !isBadRegT(rM) && rDhi != rDlo)
12562               gate = True;
12563         }
12564      } else {
12565         if (INSNA(27,20) == BITS8(0,1,1,1,0,1,0,0) &&
12566             (INSNA(7,4) & BITS4(1,1,0,1)) == BITS4(0,1,0,1)) {
12567            rN     = INSNA(3,0);
12568            rDlo   = INSNA(15,12);
12569            rDhi   = INSNA(19,16);
12570            rM     = INSNA(11,8);
12571            m_swap = (INSNA(5,5) & 1) == 1;
12572            if (rDlo != 15 && rDhi != 15 &&
12573                rN != 15 && rM != 15 && rDlo != rDhi)
12574               gate = True;
12575         }
12576      }
12577      if (gate) {
12578         IRTemp irt_rM   = newTemp(Ity_I32);
12579         IRTemp irt_rN   = newTemp(Ity_I32);
12580         IRTemp irt_rDhi = newTemp(Ity_I32);
12581         IRTemp irt_rDlo = newTemp(Ity_I32);
12582         IRTemp op_2     = newTemp(Ity_I32);
12583         IRTemp pr_1     = newTemp(Ity_I64);
12584         IRTemp pr_2     = newTemp(Ity_I64);
12585         IRTemp result   = newTemp(Ity_I64);
12586         IRTemp resHi    = newTemp(Ity_I32);
12587         IRTemp resLo    = newTemp(Ity_I32);
12588         assign( irt_rM, isT ? getIRegT(rM) : getIRegA(rM) );
12589         assign( irt_rN, isT ? getIRegT(rN) : getIRegA(rN) );
12590         assign( irt_rDhi, isT ? getIRegT(rDhi) : getIRegA(rDhi) );
12591         assign( irt_rDlo, isT ? getIRegT(rDlo) : getIRegA(rDlo) );
12592         assign( op_2, genROR32(irt_rM, m_swap ? 16 : 0) );
12593         assign( pr_1, binop(Iop_MullS32,
12594                             unop(Iop_16Sto32,
12595                                  unop(Iop_32to16, mkexpr(irt_rN))
12596                             ),
12597                             unop(Iop_16Sto32,
12598                                  unop(Iop_32to16, mkexpr(op_2))
12599                             )
12600                       )
12601         );
12602         assign( pr_2, binop(Iop_MullS32,
12603                             binop(Iop_Sar32, mkexpr(irt_rN), mkU8(16)),
12604                             binop(Iop_Sar32, mkexpr(op_2), mkU8(16))
12605                       )
12606         );
12607         assign( result, binop(Iop_Add64,
12608                               binop(Iop_Sub64,
12609                                     mkexpr(pr_1),
12610                                     mkexpr(pr_2)
12611                               ),
12612                               binop(Iop_32HLto64,
12613                                     mkexpr(irt_rDhi),
12614                                     mkexpr(irt_rDlo)
12615                               )
12616                         )
12617         );
12618         assign( resHi, unop(Iop_64HIto32, mkexpr(result)) );
12619         assign( resLo, unop(Iop_64to32, mkexpr(result)) );
12620         if (isT) {
12621            putIRegT( rDhi, mkexpr(resHi), condT );
12622            putIRegT( rDlo, mkexpr(resLo), condT );
12623         } else {
12624            putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
12625            putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
12626         }
12627         DIP("smlsld%c%s r%u, r%u, r%u, r%u\n",
12628             m_swap ? 'x' : ' ', nCC(conq), rDlo, rDhi, rN, rM);
12629         return True;
12630      }
12631      /* fall through */
12632    }
12633
12634    /* ---------- Doesn't match anything. ---------- */
12635    return False;
12636
12637 #  undef INSNA
12638 #  undef INSNT0
12639 #  undef INSNT1
12640 }
12641
12642
12643 /*------------------------------------------------------------*/
12644 /*--- V8 instructions                                      ---*/
12645 /*------------------------------------------------------------*/
12646
12647 /* Break a V128-bit value up into four 32-bit ints. */
12648
12649 static void breakupV128to32s ( IRTemp t128,
12650                                /*OUTs*/
12651                                IRTemp* t3, IRTemp* t2,
12652                                IRTemp* t1, IRTemp* t0 )
12653 {
12654    IRTemp hi64 = newTemp(Ity_I64);
12655    IRTemp lo64 = newTemp(Ity_I64);
12656    assign( hi64, unop(Iop_V128HIto64, mkexpr(t128)) );
12657    assign( lo64, unop(Iop_V128to64,   mkexpr(t128)) );
12658
12659    vassert(t0 && *t0 == IRTemp_INVALID);
12660    vassert(t1 && *t1 == IRTemp_INVALID);
12661    vassert(t2 && *t2 == IRTemp_INVALID);
12662    vassert(t3 && *t3 == IRTemp_INVALID);
12663
12664    *t0 = newTemp(Ity_I32);
12665    *t1 = newTemp(Ity_I32);
12666    *t2 = newTemp(Ity_I32);
12667    *t3 = newTemp(Ity_I32);
12668    assign( *t0, unop(Iop_64to32,   mkexpr(lo64)) );
12669    assign( *t1, unop(Iop_64HIto32, mkexpr(lo64)) );
12670    assign( *t2, unop(Iop_64to32,   mkexpr(hi64)) );
12671    assign( *t3, unop(Iop_64HIto32, mkexpr(hi64)) );
12672 }
12673
12674
12675 /* Both ARM and Thumb */
12676
12677 /* Translate a V8 instruction.  If successful, returns True and *dres
12678    may or may not be updated.  If unsuccessful, returns False and
12679    doesn't change *dres nor create any IR.
12680
12681    The Thumb and ARM encodings are potentially different.  In both
12682    ARM and Thumb mode, the caller must pass the entire 32 bits of
12683    the instruction.  Callers may pass any instruction; this function
12684    ignores anything it doesn't recognise.
12685
12686    Caller must supply an IRTemp 'condT' holding the gating condition,
12687    or IRTemp_INVALID indicating the insn is always executed.
12688
12689    If we are decoding an ARM instruction which is in the NV space
12690    then it is expected that condT will be IRTemp_INVALID, and that is
12691    asserted for.  That condition is ensured by the logic near the top
12692    of disInstr_ARM_WRK, that sets up condT.
12693
12694    When decoding for Thumb, the caller must pass the ITState pre/post
12695    this instruction, so that we can generate a SIGILL in the cases where
12696    the instruction may not be in an IT block.  When decoding for ARM,
12697    both of these must be IRTemp_INVALID.
12698
12699    Finally, the caller must indicate whether this occurs in ARM or in
12700    Thumb code.
12701 */
12702 static Bool decode_V8_instruction (
12703                /*MOD*/DisResult* dres,
12704                UInt              insnv8,
12705                IRTemp            condT,
12706                Bool              isT,
12707                IRTemp            old_itstate,
12708                IRTemp            new_itstate
12709             )
12710 {
12711 #  define INSN(_bMax,_bMin)   SLICE_UInt(insnv8, (_bMax), (_bMin))
12712
12713    if (isT) {
12714       vassert(old_itstate != IRTemp_INVALID);
12715       vassert(new_itstate != IRTemp_INVALID);
12716    } else {
12717       vassert(old_itstate == IRTemp_INVALID);
12718       vassert(new_itstate == IRTemp_INVALID);
12719    }
12720
12721    /* ARMCondcode 'conq' is only used for debug printing and for no other
12722       purpose.  For ARM, this is simply the top 4 bits of the instruction.
12723       For Thumb, the condition is not (really) known until run time, and so
12724       we set it to ARMCondAL in order that printing of these instructions
12725       does not show any condition. */
12726    ARMCondcode conq;
12727    if (isT) {
12728       conq = ARMCondAL;
12729    } else {
12730       conq = (ARMCondcode)INSN(31,28);
12731       if (conq == ARMCondNV || conq == ARMCondAL) {
12732          vassert(condT == IRTemp_INVALID);
12733       } else {
12734          vassert(condT != IRTemp_INVALID);
12735       }
12736       vassert(conq >= ARMCondEQ && conq <= ARMCondNV);
12737    }
12738
12739    /* ----------- {AESD, AESE, AESMC, AESIMC}.8 q_q ----------- */
12740    /*     31   27   23  21 19 17 15 11   7      3
12741       T1: 1111 1111 1 D 11 sz 00 d  0011 00 M 0 m  AESE Qd, Qm
12742       A1: 1111 0011 1 D 11 sz 00 d  0011 00 M 0 m  AESE Qd, Qm
12743
12744       T1: 1111 1111 1 D 11 sz 00 d  0011 01 M 0 m  AESD Qd, Qm
12745       A1: 1111 0011 1 D 11 sz 00 d  0011 01 M 0 m  AESD Qd, Qm
12746
12747       T1: 1111 1111 1 D 11 sz 00 d  0011 10 M 0 m  AESMC Qd, Qm
12748       A1: 1111 0011 1 D 11 sz 00 d  0011 10 M 0 m  AESMC Qd, Qm
12749
12750       T1: 1111 1111 1 D 11 sz 00 d  0011 11 M 0 m  AESIMC Qd, Qm
12751       A1: 1111 0011 1 D 11 sz 00 d  0011 11 M 0 m  AESIMC Qd, Qm
12752
12753       sz must be 00
12754       ARM encoding is in NV space.
12755       In Thumb mode, we must not be in an IT block.
12756    */
12757    {
12758      UInt regD = 99, regM = 99, opc = 4/*invalid*/;
12759      Bool gate = True;
12760
12761      UInt high9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
12762      if (INSN(31,23) == high9 && INSN(21,16) == BITS6(1,1,0,0,0,0)
12763          && INSN(11,8) == BITS4(0,0,1,1) && INSN(4,4) == 0) {
12764         UInt bitD = INSN(22,22);
12765         UInt fldD = INSN(15,12);
12766         UInt bitM = INSN(5,5);
12767         UInt fldM = INSN(3,0);
12768         opc  = INSN(7,6);
12769         regD = (bitD << 4) | fldD;
12770         regM = (bitM << 4) | fldM;
12771      }
12772      if ((regD & 1) == 1 || (regM & 1) == 1)
12773         gate = False;
12774
12775      if (gate) {
12776         if (isT) {
12777            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
12778         }
12779         /* In ARM mode, this is statically unconditional.  In Thumb mode,
12780            this must be dynamically unconditional, and we've SIGILLd if not.
12781            In either case we can create unconditional IR. */
12782         IRTemp op1 = newTemp(Ity_V128);
12783         IRTemp op2 = newTemp(Ity_V128);
12784         IRTemp src = newTemp(Ity_V128);
12785         IRTemp res = newTemp(Ity_V128);
12786         assign(op1,  getQReg(regD >> 1));
12787         assign(op2,  getQReg(regM >> 1));
12788         assign(src,  opc == BITS2(0,0) || opc == BITS2(0,1)
12789                         ? binop(Iop_XorV128, mkexpr(op1), mkexpr(op2))
12790                         : mkexpr(op2));
12791
12792         void* helpers[4]
12793            = { &armg_dirtyhelper_AESE,  &armg_dirtyhelper_AESD,
12794                &armg_dirtyhelper_AESMC, &armg_dirtyhelper_AESIMC };
12795         const HChar* hNames[4]
12796            = { "armg_dirtyhelper_AESE",  "armg_dirtyhelper_AESD",
12797                "armg_dirtyhelper_AESMC", "armg_dirtyhelper_AESIMC" };
12798         const HChar* iNames[4]
12799            = { "aese", "aesd", "aesmc", "aesimc" };
12800
12801         vassert(opc >= 0 && opc <= 3);
12802         void*        helper = helpers[opc];
12803         const HChar* hname  = hNames[opc];
12804
12805         IRTemp w32_3, w32_2, w32_1, w32_0;
12806         w32_3 = w32_2 = w32_1 = w32_0 = IRTemp_INVALID;
12807         breakupV128to32s( src, &w32_3, &w32_2, &w32_1, &w32_0 );
12808
12809         IRDirty* di
12810           = unsafeIRDirty_1_N( res, 0/*regparms*/, hname, helper,
12811                                mkIRExprVec_5(
12812                                   IRExpr_VECRET(),
12813                                   mkexpr(w32_3), mkexpr(w32_2),
12814                                   mkexpr(w32_1), mkexpr(w32_0)) );
12815         stmt(IRStmt_Dirty(di));
12816
12817         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
12818         DIP("%s.8 q%u, q%u\n", iNames[opc], regD >> 1, regM >> 1);
12819         return True;
12820      }
12821      /* fall through */
12822    }
12823
12824    /* ----------- SHA 3-reg insns q_q_q ----------- */
12825    /*
12826           31   27   23      19 15 11   7       3
12827       T1: 1110 1111 0  D 00 n  d  1100 N Q M 0 m  SHA1C Qd, Qn, Qm  ix=0
12828       A1: 1111 0010 ----------------------------
12829
12830       T1: 1110 1111 0  D 01 n  d  1100 N Q M 0 m  SHA1P Qd, Qn, Qm  ix=1
12831       A1: 1111 0010 ----------------------------
12832
12833       T1: 1110 1111 0  D 10 n  d  1100 N Q M 0 m  SHA1M Qd, Qn, Qm  ix=2
12834       A1: 1111 0010 ----------------------------
12835
12836       T1: 1110 1111 0  D 11 n  d  1100 N Q M 0 m  SHA1SU0 Qd, Qn, Qm  ix=3
12837       A1: 1111 0010 ----------------------------
12838       (that's a complete set of 4, based on insn[21,20])
12839
12840       T1: 1111 1111 0  D 00 n  d  1100 N Q M 0 m  SHA256H Qd, Qn, Qm  ix=4
12841       A1: 1111 0011 ----------------------------
12842
12843       T1: 1111 1111 0  D 01 n  d  1100 N Q M 0 m  SHA256H2 Qd, Qn, Qm  ix=5
12844       A1: 1111 0011 ----------------------------
12845
12846       T1: 1111 1111 0  D 10 n  d  1100 N Q M 0 m  SHA256SU1 Qd, Qn, Qm  ix=6
12847       A1: 1111 0011 ----------------------------
12848       (3/4 of a complete set of 4, based on insn[21,20])
12849
12850       Q must be 1.  Same comments about conditionalisation as for the AES
12851       group above apply.
12852    */
12853    {
12854      UInt ix = 8; /* invalid */
12855      Bool gate = False;
12856
12857      UInt hi9_sha1   = isT ? BITS9(1,1,1,0,1,1,1,1,0)
12858                            : BITS9(1,1,1,1,0,0,1,0,0);
12859      UInt hi9_sha256 = isT ? BITS9(1,1,1,1,1,1,1,1,0)
12860                            : BITS9(1,1,1,1,0,0,1,1,0);
12861      if ((INSN(31,23) == hi9_sha1 || INSN(31,23) == hi9_sha256)
12862          && INSN(11,8) == BITS4(1,1,0,0)
12863          && INSN(6,6) == 1 && INSN(4,4) == 0) {
12864         ix = INSN(21,20);
12865         if (INSN(31,23) == hi9_sha256)
12866            ix |= 4;
12867         if (ix < 7)
12868            gate = True;
12869      }
12870
12871      UInt regN = (INSN(7,7)   << 4)  | INSN(19,16);
12872      UInt regD = (INSN(22,22) << 4)  | INSN(15,12);
12873      UInt regM = (INSN(5,5)   << 4)  | INSN(3,0);
12874      if ((regD & 1) == 1 || (regM & 1) == 1 || (regN & 1) == 1)
12875         gate = False;
12876
12877      if (gate) {
12878         vassert(ix >= 0 && ix < 7);
12879         const HChar* inames[7]
12880            = { "sha1c", "sha1p", "sha1m", "sha1su0",
12881                "sha256h", "sha256h2", "sha256su1" };
12882         void(*helpers[7])(V128*,UInt,UInt,UInt,UInt,UInt,UInt,
12883                                 UInt,UInt,UInt,UInt,UInt,UInt)
12884            = { &armg_dirtyhelper_SHA1C,    &armg_dirtyhelper_SHA1P,
12885                &armg_dirtyhelper_SHA1M,    &armg_dirtyhelper_SHA1SU0,
12886                &armg_dirtyhelper_SHA256H,  &armg_dirtyhelper_SHA256H2,
12887                &armg_dirtyhelper_SHA256SU1 };
12888         const HChar* hnames[7]
12889            = { "armg_dirtyhelper_SHA1C",    "armg_dirtyhelper_SHA1P",
12890                "armg_dirtyhelper_SHA1M",    "armg_dirtyhelper_SHA1SU0",
12891                "armg_dirtyhelper_SHA256H",  "armg_dirtyhelper_SHA256H2",
12892                "armg_dirtyhelper_SHA256SU1" };
12893
12894         /* This is a really lame way to implement this, even worse than
12895            the arm64 version.  But at least it works. */
12896
12897         if (isT) {
12898            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
12899         }
12900
12901         IRTemp vD = newTemp(Ity_V128);
12902         IRTemp vN = newTemp(Ity_V128);
12903         IRTemp vM = newTemp(Ity_V128);
12904         assign(vD,  getQReg(regD >> 1));
12905         assign(vN,  getQReg(regN >> 1));
12906         assign(vM,  getQReg(regM >> 1));
12907
12908         IRTemp d32_3, d32_2, d32_1, d32_0;
12909         d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID;
12910         breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 );
12911
12912         IRTemp n32_3_pre, n32_2_pre, n32_1_pre, n32_0_pre;
12913         n32_3_pre = n32_2_pre = n32_1_pre = n32_0_pre = IRTemp_INVALID;
12914         breakupV128to32s( vN, &n32_3_pre, &n32_2_pre, &n32_1_pre, &n32_0_pre );
12915
12916         IRTemp m32_3, m32_2, m32_1, m32_0;
12917         m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
12918         breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
12919
12920         IRTemp n32_3 = newTemp(Ity_I32);
12921         IRTemp n32_2 = newTemp(Ity_I32);
12922         IRTemp n32_1 = newTemp(Ity_I32);
12923         IRTemp n32_0 = newTemp(Ity_I32);
12924
12925         /* Mask off any bits of the N register operand that aren't actually
12926            needed, so that Memcheck doesn't complain unnecessarily. */
12927         switch (ix) {
12928            case 0: case 1: case 2:
12929               assign(n32_3, mkU32(0));
12930               assign(n32_2, mkU32(0));
12931               assign(n32_1, mkU32(0));
12932               assign(n32_0, mkexpr(n32_0_pre));
12933               break;
12934            case 3: case 4: case 5: case 6:
12935               assign(n32_3, mkexpr(n32_3_pre));
12936               assign(n32_2, mkexpr(n32_2_pre));
12937               assign(n32_1, mkexpr(n32_1_pre));
12938               assign(n32_0, mkexpr(n32_0_pre));
12939               break;
12940            default:
12941               vassert(0);
12942         }
12943
12944         IRExpr** argvec
12945            = mkIRExprVec_13(
12946                 IRExpr_VECRET(),
12947                 mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0),
12948                 mkexpr(n32_3), mkexpr(n32_2), mkexpr(n32_1), mkexpr(n32_0),
12949                 mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0)
12950              );
12951
12952         IRTemp res = newTemp(Ity_V128);
12953         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
12954                                          hnames[ix], helpers[ix], argvec );
12955         stmt(IRStmt_Dirty(di));
12956         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
12957
12958         DIP("%s.8 q%u, q%u, q%u\n",
12959             inames[ix], regD >> 1, regN >> 1, regM >> 1);
12960         return True;
12961      }
12962      /* fall through */
12963    }
12964
12965    /* ----------- SHA1SU1, SHA256SU0 ----------- */
12966    /*
12967           31   27   23  21 19   15 11   7      3
12968       T1: 1111 1111 1 D 11 1010 d  0011 10 M 0 m  SHA1SU1 Qd, Qm
12969       A1: 1111 0011 ----------------------------
12970
12971       T1: 1111 1111 1 D 11 1010 d  0011 11 M 0 m  SHA256SU0 Qd, Qm
12972       A1: 1111 0011 ----------------------------
12973
12974       Same comments about conditionalisation as for the AES group above apply.
12975    */
12976    {
12977      Bool gate = False;
12978
12979      UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
12980      if (INSN(31,23) == hi9 && INSN(21,16) == BITS6(1,1,1,0,1,0)
12981          && INSN(11,7) == BITS5(0,0,1,1,1) && INSN(4,4) == 0) {
12982         gate = True;
12983      }
12984
12985      UInt regD = (INSN(22,22) << 4) | INSN(15,12);
12986      UInt regM = (INSN(5,5)   << 4) | INSN(3,0);
12987      if ((regD & 1) == 1 || (regM & 1) == 1)
12988         gate = False;
12989
12990      Bool is_1SU1 = INSN(6,6) == 0;
12991
12992      if (gate) {
12993         const HChar* iname
12994            = is_1SU1 ? "sha1su1" : "sha256su0";
12995         void (*helper)(V128*,UInt,UInt,UInt,UInt,UInt,UInt,UInt,UInt)
12996            = is_1SU1 ? &armg_dirtyhelper_SHA1SU1
12997                      : *armg_dirtyhelper_SHA256SU0;
12998         const HChar* hname
12999            = is_1SU1 ? "armg_dirtyhelper_SHA1SU1"
13000                      : "armg_dirtyhelper_SHA256SU0";
13001
13002         if (isT) {
13003            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13004         }
13005
13006         IRTemp vD = newTemp(Ity_V128);
13007         IRTemp vM = newTemp(Ity_V128);
13008         assign(vD,  getQReg(regD >> 1));
13009         assign(vM,  getQReg(regM >> 1));
13010
13011         IRTemp d32_3, d32_2, d32_1, d32_0;
13012         d32_3 = d32_2 = d32_1 = d32_0 = IRTemp_INVALID;
13013         breakupV128to32s( vD, &d32_3, &d32_2, &d32_1, &d32_0 );
13014
13015         IRTemp m32_3, m32_2, m32_1, m32_0;
13016         m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
13017         breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
13018
13019         IRExpr** argvec
13020            = mkIRExprVec_9(
13021                 IRExpr_VECRET(),
13022                 mkexpr(d32_3), mkexpr(d32_2), mkexpr(d32_1), mkexpr(d32_0),
13023                 mkexpr(m32_3), mkexpr(m32_2), mkexpr(m32_1), mkexpr(m32_0)
13024              );
13025
13026         IRTemp res = newTemp(Ity_V128);
13027         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13028                                          hname, helper, argvec );
13029         stmt(IRStmt_Dirty(di));
13030         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
13031
13032         DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1);
13033         return True;
13034      }
13035      /* fall through */
13036    }
13037
13038    /* ----------- SHA1H ----------- */
13039    /*
13040           31   27   23  21 19   15 11   7      3
13041       T1: 1111 1111 1 D 11 1001 d  0010 11 M 0 m  SHA1H Qd, Qm
13042       A1: 1111 0011 ----------------------------
13043
13044       Same comments about conditionalisation as for the AES group above apply.
13045    */
13046    {
13047      Bool gate = False;
13048
13049      UInt hi9 = isT ? BITS9(1,1,1,1,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,1,1);
13050      if (INSN(31,23) == hi9 && INSN(21,16) == BITS6(1,1,1,0,0,1)
13051          && INSN(11,6) == BITS6(0,0,1,0,1,1) && INSN(4,4) == 0) {
13052         gate = True;
13053      }
13054
13055      UInt regD = (INSN(22,22) << 4) | INSN(15,12);
13056      UInt regM = (INSN(5,5)   << 4) | INSN(3,0);
13057      if ((regD & 1) == 1 || (regM & 1) == 1)
13058         gate = False;
13059
13060      if (gate) {
13061         const HChar* iname = "sha1h";
13062         void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_SHA1H;
13063         const HChar* hname                        = "armg_dirtyhelper_SHA1H";
13064
13065         if (isT) {
13066            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13067         }
13068
13069         IRTemp vM = newTemp(Ity_V128);
13070         assign(vM,  getQReg(regM >> 1));
13071
13072         IRTemp m32_3, m32_2, m32_1, m32_0;
13073         m32_3 = m32_2 = m32_1 = m32_0 = IRTemp_INVALID;
13074         breakupV128to32s( vM, &m32_3, &m32_2, &m32_1, &m32_0 );
13075         /* m32_3, m32_2, m32_1 are just abandoned.  No harm; iropt will
13076            remove them. */
13077
13078         IRExpr*  zero   = mkU32(0);
13079         IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(),
13080                                         zero, zero, zero, mkexpr(m32_0));
13081
13082         IRTemp res = newTemp(Ity_V128);
13083         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13084                                          hname, helper, argvec );
13085         stmt(IRStmt_Dirty(di));
13086         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
13087
13088         DIP("%s.8 q%u, q%u\n", iname, regD >> 1, regM >> 1);
13089         return True;
13090      }
13091      /* fall through */
13092    }
13093
13094    /* ----------- VMULL.P64 ----------- */
13095    /*
13096           31   27   23  21 19 15 11   7       3
13097       T2: 1110 1111 1 D 10 n  d  1110 N 0 M 0 m
13098       A2: 1111 0010 -------------------------
13099
13100       The ARM documentation is pretty difficult to follow here.
13101       Same comments about conditionalisation as for the AES group above apply.
13102    */
13103    {
13104      Bool gate = False;
13105
13106      UInt hi9 = isT ? BITS9(1,1,1,0,1,1,1,1,1) : BITS9(1,1,1,1,0,0,1,0,1);
13107      if (INSN(31,23) == hi9 && INSN(21,20) == BITS2(1,0)
13108          && INSN(11,8) == BITS4(1,1,1,0)
13109          && INSN(6,6) == 0 && INSN(4,4) == 0) {
13110         gate = True;
13111      }
13112
13113      UInt regN = (INSN(7,7)   << 4)  | INSN(19,16);
13114      UInt regD = (INSN(22,22) << 4)  | INSN(15,12);
13115      UInt regM = (INSN(5,5)   << 4)  | INSN(3,0);
13116
13117      if ((regD & 1) == 1)
13118         gate = False;
13119
13120      if (gate) {
13121         const HChar* iname = "vmull";
13122         void (*helper)(V128*,UInt,UInt,UInt,UInt) = &armg_dirtyhelper_VMULLP64;
13123         const HChar* hname                        = "armg_dirtyhelper_VMULLP64";
13124
13125         if (isT) {
13126            gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13127         }
13128
13129         IRTemp srcN = newTemp(Ity_I64);
13130         IRTemp srcM = newTemp(Ity_I64);
13131         assign(srcN, getDRegI64(regN));
13132         assign(srcM, getDRegI64(regM));
13133
13134         IRExpr** argvec = mkIRExprVec_5(IRExpr_VECRET(),
13135                                         unop(Iop_64HIto32, mkexpr(srcN)),
13136                                         unop(Iop_64to32,   mkexpr(srcN)),
13137                                         unop(Iop_64HIto32, mkexpr(srcM)),
13138                                         unop(Iop_64to32, mkexpr(srcM)));
13139
13140         IRTemp res = newTemp(Ity_V128);
13141         IRDirty* di = unsafeIRDirty_1_N( res, 0/*regparms*/,
13142                                          hname, helper, argvec );
13143         stmt(IRStmt_Dirty(di));
13144         putQReg(regD >> 1, mkexpr(res), IRTemp_INVALID);
13145
13146         DIP("%s.p64 q%u, q%u, w%u\n", iname, regD >> 1, regN, regM);
13147         return True;
13148      }
13149      /* fall through */
13150    }
13151
13152    /* ----------- LDA{,B,H}, STL{,B,H} ----------- */
13153    /*     31   27   23   19   15 11   7    3
13154       A1: cond 0001 1001  n    t 1100 1001 1111  LDA  Rt, [Rn]
13155       A1: cond 0001 1111  n    t 1100 1001 1111  LDAH Rt, [Rn]
13156       A1: cond 0001 1101  n    t 1100 1001 1111  LDAB Rt, [Rn]
13157
13158       A1: cond 0001 1000  n 1111 1100 1001    t  STL  Rt, [Rn]
13159       A1: cond 0001 1110  n 1111 1100 1001    t  STLH Rt, [Rn]
13160       A1: cond 0001 1100  n 1111 1100 1001    t  STLB Rt, [Rn]
13161
13162       T1: 1110 1000 1101  n    t 1111 1010 1111  LDA  Rt, [Rn]
13163       T1: 1110 1000 1101  n    t 1111 1001 1111  LDAH Rt, [Rn]
13164       T1: 1110 1000 1101  n    t 1111 1000 1111  LDAB Rt, [Rn]
13165
13166       T1: 1110 1000 1100  n    t 1111 1010 1111  STL  Rt, [Rn]
13167       T1: 1110 1000 1100  n    t 1111 1001 1111  STLH Rt, [Rn]
13168       T1: 1110 1000 1100  n    t 1111 1000 1111  STLB Rt, [Rn]
13169    */
13170    {
13171      UInt nn     = 16; // invalid
13172      UInt tt     = 16; // invalid
13173      UInt szBlg2 = 4;  // invalid
13174      Bool isLoad = False;
13175      Bool gate   = False;
13176      if (isT) {
13177         if (INSN(31,21) == BITS11(1,1,1,0,1,0,0,0,1,1,0)
13178             && INSN(11,6) == BITS6(1,1,1,1,1,0)
13179             && INSN(3,0) == BITS4(1,1,1,1)) {
13180            nn     = INSN(19,16);
13181            tt     = INSN(15,12);
13182            isLoad = INSN(20,20) == 1;
13183            szBlg2 = INSN(5,4); // 00:B 01:H 10:W 11:invalid
13184            gate   = szBlg2 != BITS2(1,1) && tt != 15 && nn != 15;
13185         }
13186      } else {
13187         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 1
13188             && INSN(11,0) == BITS12(1,1,0,0,1,0,0,1,1,1,1,1)) {
13189            nn     = INSN(19,16);
13190            tt     = INSN(15,12);
13191            isLoad = True;
13192            szBlg2     = INSN(22,21); // 10:B 11:H 00:W 01:invalid
13193            gate   = szBlg2 != BITS2(0,1) && tt != 15 && nn != 15;
13194         }
13195         else
13196         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 0
13197             && INSN(15,4) == BITS12(1,1,1,1,1,1,0,0,1,0,0,1)) {
13198            nn     = INSN(19,16);
13199            tt     = INSN(3,0);
13200            isLoad = False;
13201            szBlg2     = INSN(22,21);  // 10:B 11:H 00:W 01:invalid
13202            gate   = szBlg2 != BITS2(0,1) && tt != 15 && nn != 15;
13203         }
13204         if (gate) {
13205            // Rearrange szBlg2 bits to be the same as the Thumb case
13206            switch (szBlg2) {
13207               case 2: szBlg2 = 0; break;
13208               case 3: szBlg2 = 1; break;
13209               case 0: szBlg2 = 2; break;
13210               default: /*NOTREACHED*/vassert(0);
13211            }
13212         }
13213      }
13214      // For both encodings, the instruction is guarded by condT, which
13215      // is passed in by the caller.  Note that the the loads and stores
13216      // are conditional, so we don't have to truncate the IRSB at this
13217      // point, but the fence is unconditional.  There's no way to
13218      // represent a conditional fence without a side exit, but it
13219      // doesn't matter from a correctness standpoint that it is
13220      // unconditional -- it just loses a bit of performance in the
13221      // case where the condition doesn't hold.
13222      if (gate) {
13223         vassert(szBlg2 <= 2 && nn <= 14 && tt <= 14);
13224         IRExpr* ea = llGetIReg(nn);
13225         if (isLoad) {
13226            static IRLoadGOp cvt[3]
13227               = { ILGop_8Uto32, ILGop_16Uto32, ILGop_Ident32 };
13228            IRTemp data = newTemp(Ity_I32);
13229            loadGuardedLE(data, cvt[szBlg2], ea, mkU32(0)/*alt*/, condT);
13230            if (isT) {
13231               putIRegT(tt, mkexpr(data), condT);
13232            } else {
13233               putIRegA(tt, mkexpr(data), condT, Ijk_INVALID);
13234            }
13235            stmt(IRStmt_MBE(Imbe_Fence));
13236         } else {
13237            stmt(IRStmt_MBE(Imbe_Fence));
13238            IRExpr* data = llGetIReg(tt);
13239            switch (szBlg2) {
13240               case 0: data = unop(Iop_32to8,  data); break;
13241               case 1: data = unop(Iop_32to16, data); break;
13242               case 2: break;
13243               default: vassert(0);
13244            }
13245            storeGuardedLE(ea, data, condT);
13246         }
13247         const HChar* ldNames[3] = { "ldab", "ldah", "lda" };
13248         const HChar* stNames[3] = { "stlb", "stlh", "stl" };
13249         DIP("%s r%u, [r%u]", (isLoad ? ldNames : stNames)[szBlg2], tt, nn);
13250         return True;
13251      }
13252      /* else fall through */
13253    }
13254
13255    /* ----------- LDAEX{,B,H,D}, STLEX{,B,H,D} ----------- */
13256    /*     31   27   23   19 15 11   7    3
13257       A1: cond 0001 1101 n  t  1110 1001 1111  LDAEXB Rt, [Rn]
13258       A1: cond 0001 1111 n  t  1110 1001 1111  LDAEXH Rt, [Rn]
13259       A1: cond 0001 1001 n  t  1110 1001 1111  LDAEX  Rt, [Rn]
13260       A1: cond 0001 1011 n  t  1110 1001 1111  LDAEXD Rt, Rt+1, [Rn]
13261
13262       A1: cond 0001 1100 n  d  1110 1001 t     STLEXB Rd, Rt, [Rn]
13263       A1: cond 0001 1110 n  d  1110 1001 t     STLEXH Rd, Rt, [Rn]
13264       A1: cond 0001 1000 n  d  1110 1001 t     STLEX  Rd, Rt, [Rn]
13265       A1: cond 0001 1010 n  d  1110 1001 t     STLEXD Rd, Rt, Rt+1, [Rn]
13266
13267           31  28   24    19 15 11   7    3
13268       T1: 111 0100 01101 n  t  1111 1100 1111  LDAEXB Rt, [Rn]
13269       T1: 111 0100 01101 n  t  1111 1101 1111  LDAEXH Rt, [Rn]
13270       T1: 111 0100 01101 n  t  1111 1110 1111  LDAEX  Rt, [Rn]
13271       T1: 111 0100 01101 n  t  t2   1111 1111  LDAEXD Rt, Rt2, [Rn]
13272
13273       T1: 111 0100 01100 n  t  1111 1100 d     STLEXB Rd, Rt, [Rn]
13274       T1: 111 0100 01100 n  t  1111 1101 d     STLEXH Rd, Rt, [Rn]
13275       T1: 111 0100 01100 n  t  1111 1110 d     STLEX  Rd, Rt, [Rn]
13276       T1: 111 0100 01100 n  t  t2   1111 d     STLEXD Rd, Rt, Rt2, [Rn]
13277    */
13278    {
13279      UInt nn     = 16; // invalid
13280      UInt tt     = 16; // invalid
13281      UInt tt2    = 16; // invalid
13282      UInt dd     = 16; // invalid
13283      UInt szBlg2 = 4;  // invalid
13284      Bool isLoad = False;
13285      Bool gate   = False;
13286      if (isT) {
13287         if (INSN(31,21) == BITS11(1,1,1,0,1,0,0,0,1,1,0)
13288             && INSN(7,6) == BITS2(1,1)) {
13289            isLoad = INSN(20,20) == 1;
13290            nn     = INSN(19,16);
13291            tt     = INSN(15,12);
13292            tt2    = INSN(11,8);
13293            szBlg2 = INSN(5,4);
13294            dd     = INSN(3,0);
13295            gate   = True;
13296            if (szBlg2 < BITS2(1,1) && tt2 != BITS4(1,1,1,1)) gate = False;
13297            if (isLoad && dd != BITS4(1,1,1,1)) gate = False;
13298            // re-set not-used register values to invalid
13299            if (szBlg2 < BITS2(1,1)) tt2 = 16;
13300            if (isLoad) dd = 16;
13301         }
13302      } else {
13303         /* ARM encoding.  Do the load and store cases separately as
13304            the register numbers are in different places and a combined decode
13305            is too confusing. */
13306         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 1
13307             && INSN(11,0) == BITS12(1,1,1,0,1,0,0,1,1,1,1,1)) {
13308            szBlg2 = INSN(22,21);
13309            isLoad = True;
13310            nn     = INSN(19,16);
13311            tt     = INSN(15,12);
13312            gate   = True;
13313         }
13314         else
13315         if (INSN(27,23) == BITS5(0,0,0,1,1) && INSN(20,20) == 0
13316             && INSN(11,4) == BITS8(1,1,1,0,1,0,0,1)) {
13317            szBlg2 = INSN(22,21);
13318            isLoad = False;
13319            nn     = INSN(19,16);
13320            dd     = INSN(15,12);
13321            tt     = INSN(3,0);
13322            gate   = True;
13323         }
13324         if (gate) {
13325            // Rearrange szBlg2 bits to be the same as the Thumb case
13326            switch (szBlg2) {
13327               case 2: szBlg2 = 0; break;
13328               case 3: szBlg2 = 1; break;
13329               case 0: szBlg2 = 2; break;
13330               case 1: szBlg2 = 3; break;
13331               default: /*NOTREACHED*/vassert(0);
13332            }
13333         }
13334      }
13335      // Perform further checks on register numbers
13336      if (gate) {
13337         /**/ if (isT && isLoad) {
13338            // Thumb load
13339            if (szBlg2 < 3) {
13340               if (! (tt != 13 && tt != 15 && nn != 15)) gate = False;
13341            } else {
13342               if (! (tt != 13 && tt != 15 && tt2 != 13 && tt2 != 15
13343                      && tt != tt2 && nn != 15)) gate = False;
13344            }
13345         }
13346         else if (isT && !isLoad) {
13347            // Thumb store
13348            if (szBlg2 < 3) {
13349               if (! (dd != 13 && dd != 15 && tt != 13 && tt != 15
13350                      && nn != 15 && dd != nn && dd != tt)) gate = False;
13351            } else {
13352               if (! (dd != 13 && dd != 15 && tt != 13 && tt != 15
13353                      && tt2 != 13 && tt2 != 15 && nn != 15 && dd != nn
13354                      && dd != tt && dd != tt2)) gate = False;
13355            }
13356         }
13357         else if (!isT && isLoad) {
13358            // ARM Load
13359            if (szBlg2 < 3) {
13360               if (! (tt != 15 && nn != 15)) gate = False;
13361            } else {
13362               if (! ((tt & 1) == 0 && tt != 14 && nn != 15)) gate = False;
13363               vassert(tt2 == 16/*invalid*/);
13364               tt2 = tt + 1;
13365            }
13366         }
13367         else if (!isT && !isLoad) {
13368            // ARM Store
13369            if (szBlg2 < 3) {
13370               if (! (dd != 15 && tt != 15 && nn != 15
13371                      && dd != nn && dd != tt)) gate = False;
13372            } else {
13373               if (! (dd != 15 && (tt & 1) == 0 && tt != 14 && nn != 15
13374                      && dd != nn && dd != tt && dd != tt+1)) gate = False;
13375               vassert(tt2 == 16/*invalid*/);
13376               tt2 = tt + 1;
13377            }
13378         }
13379         else /*NOTREACHED*/vassert(0);
13380      }
13381      if (gate) {
13382         // Paranoia ..
13383         vassert(szBlg2 <= 3);
13384         if (szBlg2 < 3) { vassert(tt2 == 16/*invalid*/); }
13385                    else { vassert(tt2 <= 14); }
13386         if (isLoad) { vassert(dd == 16/*invalid*/); }
13387                else { vassert(dd <= 14); }
13388      }
13389      // If we're still good even after all that, generate the IR.
13390      if (gate) {
13391         /* First, go unconditional.  Staying in-line is too complex. */
13392         if (isT) {
13393            vassert(condT != IRTemp_INVALID);
13394            mk_skip_over_T32_if_cond_is_false( condT );
13395         } else {
13396            if (condT != IRTemp_INVALID) {
13397               mk_skip_over_A32_if_cond_is_false( condT );
13398               condT = IRTemp_INVALID;
13399            }
13400         }
13401         /* Now the load or store. */
13402         IRType ty = Ity_INVALID; /* the type of the transferred data */
13403         const HChar* nm = NULL;
13404         switch (szBlg2) {
13405            case 0: nm = "b"; ty = Ity_I8;  break;
13406            case 1: nm = "h"; ty = Ity_I16; break;
13407            case 2: nm = "";  ty = Ity_I32; break;
13408            case 3: nm = "d"; ty = Ity_I64; break;
13409            default: vassert(0);
13410         }
13411         IRExpr* ea = isT ? getIRegT(nn) : getIRegA(nn);
13412         if (isLoad) {
13413            // LOAD.  Transaction, then fence.
13414            IROp widen = Iop_INVALID;
13415            switch (szBlg2) {
13416               case 0: widen = Iop_8Uto32;  break;
13417               case 1: widen = Iop_16Uto32; break;
13418               case 2: case 3: break;
13419               default: vassert(0);
13420            }
13421            IRTemp  res = newTemp(ty);
13422            // FIXME: assumes little-endian guest
13423            stmt( IRStmt_LLSC(Iend_LE, res, ea, NULL/*this is a load*/) );
13424
13425 #          define PUT_IREG(_nnz, _eez) \
13426               do { vassert((_nnz) <= 14); /* no writes to the PC */ \
13427                    if (isT) { putIRegT((_nnz), (_eez), IRTemp_INVALID); } \
13428                        else { putIRegA((_nnz), (_eez), \
13429                               IRTemp_INVALID, Ijk_Boring); } } while(0)
13430            if (ty == Ity_I64) {
13431               // FIXME: assumes little-endian guest
13432               PUT_IREG(tt,  unop(Iop_64to32, mkexpr(res)));
13433               PUT_IREG(tt2, unop(Iop_64HIto32, mkexpr(res)));
13434            } else {
13435               PUT_IREG(tt, widen == Iop_INVALID
13436                               ? mkexpr(res) : unop(widen, mkexpr(res)));
13437            }
13438            stmt(IRStmt_MBE(Imbe_Fence));
13439            if (ty == Ity_I64) {
13440               DIP("ldrex%s%s r%u, r%u, [r%u]\n",
13441                   nm, isT ? "" : nCC(conq), tt, tt2, nn);
13442            } else {
13443               DIP("ldrex%s%s r%u, [r%u]\n", nm, isT ? "" : nCC(conq), tt, nn);
13444            }
13445 #          undef PUT_IREG
13446         } else {
13447            // STORE.  Fence, then transaction.
13448            IRTemp resSC1, resSC32, data;
13449            IROp   narrow = Iop_INVALID;
13450            switch (szBlg2) {
13451               case 0: narrow = Iop_32to8; break;
13452               case 1: narrow = Iop_32to16; break;
13453               case 2: case 3: break;
13454               default: vassert(0);
13455            }
13456            stmt(IRStmt_MBE(Imbe_Fence));
13457            data = newTemp(ty);
13458 #          define GET_IREG(_nnz) (isT ? getIRegT(_nnz) : getIRegA(_nnz))
13459            assign(data,
13460                   ty == Ity_I64
13461                      // FIXME: assumes little-endian guest
13462                      ? binop(Iop_32HLto64, GET_IREG(tt2), GET_IREG(tt))
13463                      : narrow == Iop_INVALID
13464                         ? GET_IREG(tt)
13465                         : unop(narrow, GET_IREG(tt)));
13466 #          undef GET_IREG
13467            resSC1 = newTemp(Ity_I1);
13468            // FIXME: assumes little-endian guest
13469            stmt( IRStmt_LLSC(Iend_LE, resSC1, ea, mkexpr(data)) );
13470
13471            /* Set rDD to 1 on failure, 0 on success.  Currently we have
13472               resSC1 == 0 on failure, 1 on success. */
13473            resSC32 = newTemp(Ity_I32);
13474            assign(resSC32,
13475                   unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
13476            vassert(dd <= 14); /* no writes to the PC */
13477            if (isT) {
13478               putIRegT(dd, mkexpr(resSC32), IRTemp_INVALID);
13479            } else {
13480               putIRegA(dd, mkexpr(resSC32), IRTemp_INVALID, Ijk_Boring);
13481            }
13482            if (ty == Ity_I64) {
13483               DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
13484                   nm, isT ? "" : nCC(conq), dd, tt, tt2, nn);
13485            } else {
13486               DIP("strex%s%s r%u, r%u, [r%u]\n",
13487                   nm, isT ? "" : nCC(conq), dd, tt, nn);
13488            }
13489         } /* if (isLoad) */
13490         return True;
13491      } /* if (gate) */
13492      /* else fall through */
13493    }
13494
13495    /* ----------- VSEL<c>.F64 d_d_d, VSEL<c>.F32 s_s_s ----------- */
13496    /*        31   27    22 21 19 15 11  8 7 6 5 4 3
13497       T1/A1: 1111 11100 D  cc n  d  101 1 N 0 M 0 m  VSEL<c>.F64 Dd, Dn, Dm
13498       T1/A1: 1111 11100 D  cc n  d  101 0 N 0 M 0 m  VSEL<c>.F32 Sd, Sn, Sm
13499
13500       ARM encoding is in NV space.
13501       In Thumb mode, we must not be in an IT block.
13502    */
13503    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,0) && INSN(11,9) == BITS3(1,0,1)
13504        && INSN(6,6) == 0 && INSN(4,4) == 0) {
13505       UInt bit_D  = INSN(22,22);
13506       UInt fld_cc = INSN(21,20);
13507       UInt fld_n  = INSN(19,16);
13508       UInt fld_d  = INSN(15,12);
13509       Bool isF64  = INSN(8,8) == 1;
13510       UInt bit_N  = INSN(7,7);
13511       UInt bit_M  = INSN(5,5);
13512       UInt fld_m  = INSN(3,0);
13513
13514       UInt dd = isF64 ? ((bit_D << 4) | fld_d) : ((fld_d << 1) | bit_D);
13515       UInt nn = isF64 ? ((bit_N << 4) | fld_n) : ((fld_n << 1) | bit_N);
13516       UInt mm = isF64 ? ((bit_M << 4) | fld_m) : ((fld_m << 1) | bit_M);
13517
13518       UInt cc_1 = (fld_cc >> 1) & 1;
13519       UInt cc_0 = (fld_cc >> 0) & 1;
13520       UInt cond = (fld_cc << 2) | ((cc_1 ^ cc_0) << 1) | 0;
13521
13522       if (isT) {
13523          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13524       }
13525       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13526          this must be dynamically unconditional, and we've SIGILLd if not.
13527          In either case we can create unconditional IR. */
13528
13529       IRTemp guard = newTemp(Ity_I32);
13530       assign(guard, mk_armg_calculate_condition(cond));
13531       IRExpr* srcN = (isF64 ? llGetDReg : llGetFReg)(nn);
13532       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13533       IRExpr* res  = IRExpr_ITE(unop(Iop_32to1, mkexpr(guard)), srcN, srcM);
13534       (isF64 ? llPutDReg : llPutFReg)(dd, res);
13535
13536       UChar rch = isF64 ? 'd' : 'f';
13537       DIP("vsel%s.%s %c%u, %c%u, %c%u\n",
13538           nCC(cond), isF64 ? "f64" : "f32", rch, dd, rch, nn, rch, mm);
13539       return True;
13540    }
13541
13542    /* -------- VRINT{A,N,P,M}.F64 d_d, VRINT{A,N,P,M}.F32 s_s -------- */
13543    /*        31        22 21   17 15 11  8 7  5 4 3
13544       T1/A1: 111111101 D  1110 rm Vd 101 1 01 M 0 Vm VRINT{A,N,P,M}.F64 Dd, Dm
13545       T1/A1: 111111101 D  1110 rm Vd 101 0 01 M 0 Vm VRINT{A,N,P,M}.F32 Sd, Sm
13546
13547       ARM encoding is in NV space.
13548       In Thumb mode, we must not be in an IT block.
13549    */
13550    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1)
13551        && INSN(21,18) == BITS4(1,1,1,0) && INSN(11,9) == BITS3(1,0,1)
13552        && INSN(7,6) == BITS2(0,1) && INSN(4,4) == 0) {
13553       UInt bit_D  = INSN(22,22);
13554       UInt fld_rm = INSN(17,16);
13555       UInt fld_d  = INSN(15,12);
13556       Bool isF64  = INSN(8,8) == 1;
13557       UInt bit_M  = INSN(5,5);
13558       UInt fld_m  = INSN(3,0);
13559
13560       UInt dd = isF64 ? ((bit_D << 4) | fld_d) : ((fld_d << 1) | bit_D);
13561       UInt mm = isF64 ? ((bit_M << 4) | fld_m) : ((fld_m << 1) | bit_M);
13562
13563       if (isT) {
13564          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13565       }
13566       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13567          this must be dynamically unconditional, and we've SIGILLd if not.
13568          In either case we can create unconditional IR. */
13569
13570       UChar c = '?';
13571       IRRoundingMode rm = Irrm_NEAREST;
13572       switch (fld_rm) {
13573          /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
13574             kludge since it doesn't take into account the nearest-even vs
13575             nearest-away semantics. */
13576          case BITS2(0,0): c = 'a'; rm = Irrm_NEAREST; break;
13577          case BITS2(0,1): c = 'n'; rm = Irrm_NEAREST; break;
13578          case BITS2(1,0): c = 'p'; rm = Irrm_PosINF;  break;
13579          case BITS2(1,1): c = 'm'; rm = Irrm_NegINF;  break;
13580          default: vassert(0);
13581       }
13582
13583       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13584       IRExpr* res  = binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13585                            mkU32((UInt)rm), srcM);
13586       (isF64 ? llPutDReg : llPutFReg)(dd, res);
13587
13588       UChar rch = isF64 ? 'd' : 'f';
13589       DIP("vrint%c.%s.%s %c%u, %c%u\n",
13590           c, isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
13591       return True;
13592    }
13593
13594    /* -------- VRINT{Z,R}.F64.F64 d_d, VRINT{Z,R}.F32.F32 s_s -------- */
13595    /*     31   27    22 21     15 11   7  6 5 4 3
13596       T1: 1110 11101 D  110110 Vd 1011 op 1 M 0 Vm VRINT<r><c>.F64.F64 Dd, Dm
13597       A1: cond 11101 D  110110 Vd 1011 op 1 M 0 Vm
13598
13599       T1: 1110 11101 D  110110 Vd 1010 op 1 M 0 Vm VRINT<r><c>.F32.F32 Sd, Sm
13600       A1: cond 11101 D  110110 Vd 1010 op 1 M 0 Vm
13601
13602       In contrast to the VRINT variants just above, this can be conditional.
13603    */
13604    if ((isT ? (INSN(31,28) == BITS4(1,1,1,0)) : True)
13605        && INSN(27,23) == BITS5(1,1,1,0,1) && INSN(21,16) == BITS6(1,1,0,1,1,0)
13606        && INSN(11,9) == BITS3(1,0,1) && INSN(6,6) == 1 && INSN(4,4) == 0) {
13607       UInt bit_D   = INSN(22,22);
13608       UInt fld_Vd  = INSN(15,12);
13609       Bool isF64   = INSN(8,8) == 1;
13610       Bool rToZero = INSN(7,7) == 1;
13611       UInt bit_M   = INSN(5,5);
13612       UInt fld_Vm  = INSN(3,0);
13613       UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
13614       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13615
13616       if (isT) vassert(condT != IRTemp_INVALID);
13617       IRType ty  = isF64 ? Ity_F64 : Ity_F32;
13618       IRTemp src = newTemp(ty);
13619       IRTemp res = newTemp(ty);
13620       assign(src, (isF64 ? getDReg : getFReg)(mm));
13621
13622       IRTemp rm = newTemp(Ity_I32);
13623       assign(rm, rToZero ? mkU32(Irrm_ZERO)
13624                          : mkexpr(mk_get_IR_rounding_mode()));
13625       assign(res, binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13626                         mkexpr(rm), mkexpr(src)));
13627       (isF64 ? putDReg : putFReg)(dd, mkexpr(res), condT);
13628
13629       UChar rch = isF64 ? 'd' : 'f';
13630       DIP("vrint%c.%s.%s %c%u, %c%u\n",
13631           rToZero ? 'z' : 'r',
13632           isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
13633       return True;
13634    }
13635
13636    /* ----------- VCVT{A,N,P,M}{.S32,.U32}{.F64,.F32} ----------- */
13637    /*        31   27    22 21   17 15 11  8  7  6 5 4 3
13638       T1/A1: 1111 11101 D  1111 rm Vd 101 sz op 1 M 0 Vm
13639              VCVT{A,N,P,M}{.S32,.U32}.F64 Sd, Dm
13640              VCVT{A,N,P,M}{.S32,.U32}.F32 Sd, Sm
13641
13642       ARM encoding is in NV space.
13643       In Thumb mode, we must not be in an IT block.
13644    */
13645    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1) && INSN(21,18) == BITS4(1,1,1,1)
13646        && INSN(11,9) == BITS3(1,0,1) && INSN(6,6) == 1 && INSN(4,4) == 0) {
13647       UInt bit_D  = INSN(22,22);
13648       UInt fld_rm = INSN(17,16);
13649       UInt fld_Vd = INSN(15,12);
13650       Bool isF64  = INSN(8,8) == 1;
13651       Bool isU    = INSN(7,7) == 0;
13652       UInt bit_M  = INSN(5,5);
13653       UInt fld_Vm = INSN(3,0);
13654
13655       UInt dd = (fld_Vd << 1) | bit_D;
13656       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13657
13658       if (isT) {
13659          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13660       }
13661       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13662          this must be dynamically unconditional, and we've SIGILLd if not.
13663          In either case we can create unconditional IR. */
13664
13665       UChar c = '?';
13666       IRRoundingMode rm = Irrm_NEAREST;
13667       switch (fld_rm) {
13668          /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
13669             kludge since it doesn't take into account the nearest-even vs
13670             nearest-away semantics. */
13671          case BITS2(0,0): c = 'a'; rm = Irrm_NEAREST; break;
13672          case BITS2(0,1): c = 'n'; rm = Irrm_NEAREST; break;
13673          case BITS2(1,0): c = 'p'; rm = Irrm_PosINF;  break;
13674          case BITS2(1,1): c = 'm'; rm = Irrm_NegINF;  break;
13675          default: vassert(0);
13676       }
13677
13678       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13679       IRTemp   res = newTemp(Ity_I32);
13680
13681       /* The arm back end doesn't support use of Iop_F32toI32U or
13682          Iop_F32toI32S, so for those cases we widen the F32 to F64
13683          and then follow the F64 route. */
13684       if (!isF64) {
13685          srcM = unop(Iop_F32toF64, srcM);
13686       }
13687       assign(res, binop(isU ? Iop_F64toI32U : Iop_F64toI32S,
13688                         mkU32((UInt)rm), srcM));
13689
13690       llPutFReg(dd, unop(Iop_ReinterpI32asF32, mkexpr(res)));
13691
13692       UChar rch = isF64 ? 'd' : 'f';
13693       DIP("vcvt%c.%s.%s %c%u, %c%u\n",
13694           c, isU ? "u32" : "s32", isF64 ? "f64" : "f32", 's', dd, rch, mm);
13695       return True;
13696    }
13697
13698    /* ----------- V{MAX,MIN}NM{.F64 d_d_d, .F32 s_s_s} ----------- */
13699    /* 31   27    22 21 19 15 11  8 7 6  5 4 3
13700       1111 11101 D  00 Vn Vd 101 1 N op M 0 Vm  V{MIN,MAX}NM.F64 Dd, Dn, Dm
13701       1111 11101 D  00 Vn Vd 101 0 N op M 0 Vm  V{MIN,MAX}NM.F32 Sd, Sn, Sm
13702
13703       ARM encoding is in NV space.
13704       In Thumb mode, we must not be in an IT block.
13705    */
13706    if (INSN(31,23) == BITS9(1,1,1,1,1,1,1,0,1) && INSN(21,20) == BITS2(0,0)
13707        && INSN(11,9) == BITS3(1,0,1) && INSN(4,4) == 0) {
13708       UInt bit_D  = INSN(22,22);
13709       UInt fld_Vn = INSN(19,16);
13710       UInt fld_Vd = INSN(15,12);
13711       Bool isF64  = INSN(8,8) == 1;
13712       UInt bit_N  = INSN(7,7);
13713       Bool isMAX  = INSN(6,6) == 0;
13714       UInt bit_M  = INSN(5,5);
13715       UInt fld_Vm = INSN(3,0);
13716
13717       UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
13718       UInt nn = isF64 ? ((bit_N << 4) | fld_Vn) : ((fld_Vn << 1) | bit_N);
13719       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13720
13721       if (isT) {
13722          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13723       }
13724       /* In ARM mode, this is statically unconditional.  In Thumb mode,
13725          this must be dynamically unconditional, and we've SIGILLd if not.
13726          In either case we can create unconditional IR. */
13727
13728       IROp op = isF64 ? (isMAX ? Iop_MaxNumF64 : Iop_MinNumF64)
13729                       : (isMAX ? Iop_MaxNumF32 : Iop_MinNumF32);
13730       IRExpr* srcN = (isF64 ? llGetDReg : llGetFReg)(nn);
13731       IRExpr* srcM = (isF64 ? llGetDReg : llGetFReg)(mm);
13732       IRExpr* res  = binop(op, srcN, srcM);
13733       (isF64 ? llPutDReg : llPutFReg)(dd, res);
13734
13735       UChar rch = isF64 ? 'd' : 'f';
13736       DIP("v%snm.%s %c%u, %c%u, %c%u\n",
13737           isMAX ? "max" : "min", isF64 ? "f64" : "f32",
13738           rch, dd, rch, nn, rch, mm);
13739       return True;
13740    }
13741
13742    /* ----------- VRINTX.F64.F64 d_d, VRINTX.F32.F32 s_s ----------- */
13743    /*     31   27    22 21     15 11  8 7  5 4 3
13744       T1: 1110 11101 D  110111 Vd 101 1 01 M 0 Vm VRINTX<c>.F64.F64 Dd, Dm
13745       A1: cond 11101 D  110111 Vd 101 1 01 M 0 Vm
13746
13747       T1: 1110 11101 D  110111 Vd 101 0 01 M 0 Vm VRINTX<c>.F32.F32 Dd, Dm
13748       A1: cond 11101 D  110111 Vd 101 0 01 M 0 Vm
13749
13750       Like VRINT{Z,R}{.F64.F64, .F32.F32} just above, this can be conditional.
13751       This produces the same code as the VRINTR case since we ignore the
13752       requirement to signal inexactness.
13753    */
13754    if ((isT ? (INSN(31,28) == BITS4(1,1,1,0)) : True)
13755        && INSN(27,23) == BITS5(1,1,1,0,1) && INSN(21,16) == BITS6(1,1,0,1,1,1)
13756        && INSN(11,9) == BITS3(1,0,1) && INSN(7,6) == BITS2(0,1)
13757        && INSN(4,4) == 0) {
13758       UInt bit_D  = INSN(22,22);
13759       UInt fld_Vd = INSN(15,12);
13760       Bool isF64  = INSN(8,8) == 1;
13761       UInt bit_M  = INSN(5,5);
13762       UInt fld_Vm = INSN(3,0);
13763       UInt dd = isF64 ? ((bit_D << 4) | fld_Vd) : ((fld_Vd << 1) | bit_D);
13764       UInt mm = isF64 ? ((bit_M << 4) | fld_Vm) : ((fld_Vm << 1) | bit_M);
13765
13766       if (isT) vassert(condT != IRTemp_INVALID);
13767       IRType ty  = isF64 ? Ity_F64 : Ity_F32;
13768       IRTemp src = newTemp(ty);
13769       IRTemp res = newTemp(ty);
13770       assign(src, (isF64 ? getDReg : getFReg)(mm));
13771
13772       IRTemp rm = newTemp(Ity_I32);
13773       assign(rm, mkexpr(mk_get_IR_rounding_mode()));
13774       assign(res, binop(isF64 ? Iop_RoundF64toInt : Iop_RoundF32toInt,
13775                         mkexpr(rm), mkexpr(src)));
13776       (isF64 ? putDReg : putFReg)(dd, mkexpr(res), condT);
13777
13778       UChar rch = isF64 ? 'd' : 'f';
13779       DIP("vrint%c.%s.%s %c%u, %c%u\n",
13780           'x',
13781           isF64 ? "f64" : "f32", isF64 ? "f64" : "f32", rch, dd, rch, mm);
13782       return True;
13783    }
13784
13785    /* ----------- V{MAX,MIN}NM{.F32 d_d_d, .F32 q_q_q} ----------- */
13786    /*     31   27    22 21 20 19 15 11   7 6 5 4 3
13787       T1: 1111 11110 D  op 0  Vn Vd 1111 N 1 M 1 Vm  V{MIN,MAX}NM.F32 Qd,Qn,Qm
13788       A1: 1111 00110 D  op 0  Vn Vd 1111 N 1 M 1 Vm
13789
13790       T1: 1111 11110 D  op 0  Vn Vd 1111 N 0 M 1 Vm  V{MIN,MAX}NM.F32 Dd,Dn,Dm
13791       A1: 1111 00110 D  op 0  Vn Vd 1111 N 0 M 1 Vm
13792
13793       ARM encoding is in NV space.
13794       In Thumb mode, we must not be in an IT block.
13795    */
13796    if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,0)
13797                            : BITS9(1,1,1,1,0,0,1,1,0))
13798        && INSN(20,20) == 0 && INSN(11,8) == BITS4(1,1,1,1) && INSN(4,4) == 1) {
13799       UInt bit_D  = INSN(22,22);
13800       Bool isMax  = INSN(21,21) == 0;
13801       UInt fld_Vn = INSN(19,16);
13802       UInt fld_Vd = INSN(15,12);
13803       UInt bit_N  = INSN(7,7);
13804       Bool isQ    = INSN(6,6) == 1;
13805       UInt bit_M  = INSN(5,5);
13806       UInt fld_Vm = INSN(3,0);
13807
13808       /* dd, nn, mm are D-register numbers. */
13809       UInt dd = (bit_D << 4) | fld_Vd;
13810       UInt nn = (bit_N << 4) | fld_Vn;
13811       UInt mm = (bit_M << 4) | fld_Vm;
13812
13813       if (! (isQ && ((dd & 1) == 1 || (nn & 1) == 1 || (mm & 1) == 1))) {
13814          /* Do this piecewise on f regs.  This is a bit tricky
13815             though because we are dealing with the full 16 x Q == 32 x D
13816             register set, so the implied F reg numbers are 0 to 63.  But
13817             ll{Get,Put}FReg only allow the 0 .. 31 as those are the only
13818             architected F regs. */
13819          UInt ddF = dd << 1;
13820          UInt nnF = nn << 1;
13821          UInt mmF = mm << 1;
13822
13823          if (isT) {
13824             gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13825          }
13826          /* In ARM mode, this is statically unconditional.  In Thumb mode,
13827             this must be dynamically unconditional, and we've SIGILLd if not.
13828             In either case we can create unconditional IR. */
13829
13830          IROp op = isMax ? Iop_MaxNumF32 : Iop_MinNumF32;
13831
13832          IRTemp r0 = newTemp(Ity_F32);
13833          IRTemp r1 = newTemp(Ity_F32);
13834          IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13835          IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13836
13837          assign(r0, binop(op, llGetFReg_up_to_64(nnF+0),
13838                               llGetFReg_up_to_64(mmF+0)));
13839          assign(r1, binop(op, llGetFReg_up_to_64(nnF+1),
13840                               llGetFReg_up_to_64(mmF+1)));
13841          if (isQ) {
13842             assign(r2, binop(op, llGetFReg_up_to_64(nnF+2),
13843                                  llGetFReg_up_to_64(mmF+2)));
13844             assign(r3, binop(op, llGetFReg_up_to_64(nnF+3),
13845                                  llGetFReg_up_to_64(mmF+3)));
13846          }
13847          llPutFReg_up_to_64(ddF+0, mkexpr(r0));
13848          llPutFReg_up_to_64(ddF+1, mkexpr(r1));
13849          if (isQ) {
13850             llPutFReg_up_to_64(ddF+2, mkexpr(r2));
13851             llPutFReg_up_to_64(ddF+3, mkexpr(r3));
13852          }
13853
13854          HChar rch = isQ ? 'q' : 'd';
13855          UInt  sh  = isQ ? 1 : 0;
13856          DIP("v%snm.f32 %c%u, %c%u, %c%u\n",
13857               isMax ? "max" : "min", rch,
13858               dd >> sh, rch, nn >> sh, rch, mm >> sh);
13859          return True;
13860       }
13861       /* else fall through */
13862    }
13863
13864    /* ----------- VCVT{A,N,P,M}{.F32 d_d, .F32 q_q} ----------- */
13865    /*     31   27    22 21     15 11 9  7  6 5 4 3
13866       T1: 1111 11111 D  111011 Vd 00 rm op Q M 0 Vm
13867       A1: 1111 00111 D  111011 Vd 00 rm op Q M 0 Vm
13868
13869       ARM encoding is in NV space.
13870       In Thumb mode, we must not be in an IT block.
13871    */
13872    if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,1)
13873                            : BITS9(1,1,1,1,0,0,1,1,1))
13874        && INSN(21,16) == BITS6(1,1,1,0,1,1) && INSN(11,10) == BITS2(0,0)
13875        && INSN(4,4) == 0) {
13876       UInt bit_D  = INSN(22,22);
13877       UInt fld_Vd = INSN(15,12);
13878       UInt fld_rm = INSN(9,8);
13879       Bool isU    = INSN(7,7) == 1;
13880       Bool isQ    = INSN(6,6) == 1;
13881       UInt bit_M  = INSN(5,5);
13882       UInt fld_Vm = INSN(3,0);
13883
13884       /* dd, nn, mm are D-register numbers. */
13885       UInt dd = (bit_D << 4) | fld_Vd;
13886       UInt mm = (bit_M << 4) | fld_Vm;
13887
13888       if (! (isQ && ((dd & 1) == 1 || (mm & 1) == 1))) {
13889          /* Do this piecewise on f regs. */
13890          UInt ddF = dd << 1;
13891          UInt mmF = mm << 1;
13892
13893          if (isT) {
13894             gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13895          }
13896          /* In ARM mode, this is statically unconditional.  In Thumb mode,
13897             this must be dynamically unconditional, and we've SIGILLd if not.
13898             In either case we can create unconditional IR. */
13899
13900          UChar cvtc = '?';
13901          IRRoundingMode rm = Irrm_NEAREST;
13902          switch (fld_rm) {
13903             /* The use of NEAREST for both the 'a' and 'n' cases is a bit of a
13904                kludge since it doesn't take into account the nearest-even vs
13905                nearest-away semantics. */
13906             case BITS2(0,0): cvtc = 'a'; rm = Irrm_NEAREST; break;
13907             case BITS2(0,1): cvtc = 'n'; rm = Irrm_NEAREST; break;
13908             case BITS2(1,0): cvtc = 'p'; rm = Irrm_PosINF;  break;
13909             case BITS2(1,1): cvtc = 'm'; rm = Irrm_NegINF;  break;
13910             default: vassert(0);
13911          }
13912
13913          IROp cvt = isU ? Iop_F64toI32U : Iop_F64toI32S;
13914
13915          IRTemp r0 = newTemp(Ity_F32);
13916          IRTemp r1 = newTemp(Ity_F32);
13917          IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13918          IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
13919
13920          IRExpr* rmE = mkU32((UInt)rm);
13921
13922          assign(r0, unop(Iop_ReinterpI32asF32,
13923                          binop(cvt, rmE, unop(Iop_F32toF64,
13924                                               llGetFReg_up_to_64(mmF+0)))));
13925          assign(r1, unop(Iop_ReinterpI32asF32,
13926                          binop(cvt, rmE, unop(Iop_F32toF64,
13927                                               llGetFReg_up_to_64(mmF+1)))));
13928          if (isQ) {
13929             assign(r2, unop(Iop_ReinterpI32asF32,
13930                             binop(cvt, rmE, unop(Iop_F32toF64,
13931                                                  llGetFReg_up_to_64(mmF+2)))));
13932             assign(r3, unop(Iop_ReinterpI32asF32,
13933                             binop(cvt, rmE, unop(Iop_F32toF64,
13934                                                  llGetFReg_up_to_64(mmF+3)))));
13935          }
13936
13937          llPutFReg_up_to_64(ddF+0, mkexpr(r0));
13938          llPutFReg_up_to_64(ddF+1, mkexpr(r1));
13939          if (isQ) {
13940             llPutFReg_up_to_64(ddF+2, mkexpr(r2));
13941             llPutFReg_up_to_64(ddF+3, mkexpr(r3));
13942          }
13943
13944          HChar rch = isQ ? 'q' : 'd';
13945          UInt  sh  = isQ ? 1 : 0;
13946          DIP("vcvt%c.%c32.f32 %c%u, %c%u\n",
13947               cvtc, isU ? 'u' : 's', rch, dd >> sh, rch, mm >> sh);
13948          return True;
13949       }
13950       /* else fall through */
13951    }
13952
13953    /* ----------- VRINT{A,N,P,M,X,Z}{.F32 d_d, .F32 q_q} ----------- */
13954    /*     31   27    22 21     15 11 9  6 5 4 3
13955       T1: 1111 11111 D  111010 Vd 01 op Q M 0 Vm
13956       A1: 1111 00111 D  111010 Vd 01 op Q M 0 Vm
13957
13958       ARM encoding is in NV space.
13959       In Thumb mode, we must not be in an IT block.
13960    */
13961    if (INSN(31,23) == (isT ? BITS9(1,1,1,1,1,1,1,1,1)
13962                            : BITS9(1,1,1,1,0,0,1,1,1))
13963        && INSN(21,16) == BITS6(1,1,1,0,1,0) && INSN(11,10) == BITS2(0,1)
13964        && INSN(4,4) == 0) {
13965       UInt bit_D  = INSN(22,22);
13966       UInt fld_Vd = INSN(15,12);
13967       UInt fld_op = INSN(9,7);
13968       Bool isQ    = INSN(6,6) == 1;
13969       UInt bit_M  = INSN(5,5);
13970       UInt fld_Vm = INSN(3,0);
13971
13972       /* dd, nn, mm are D-register numbers. */
13973       UInt dd = (bit_D << 4) | fld_Vd;
13974       UInt mm = (bit_M << 4) | fld_Vm;
13975
13976       if (! (fld_op == BITS3(1,0,0) || fld_op == BITS3(1,1,0))
13977           && ! (isQ && ((dd & 1) == 1 || (mm & 1) == 1))) {
13978          /* Do this piecewise on f regs. */
13979          UInt ddF = dd << 1;
13980          UInt mmF = mm << 1;
13981
13982          if (isT) {
13983             gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
13984          }
13985          /* In ARM mode, this is statically unconditional.  In Thumb mode,
13986             this must be dynamically unconditional, and we've SIGILLd if not.
13987             In either case we can create unconditional IR. */
13988
13989          UChar cvtc = '?';
13990          IRRoundingMode rm = Irrm_NEAREST;
13991          switch (fld_op) {
13992             /* Various kludges:
13993                - The use of NEAREST for both the 'a' and 'n' cases,
13994                  since it doesn't take into account the nearest-even vs
13995                  nearest-away semantics.
13996                - For the 'x' case, we don't signal inexactness.
13997             */
13998             case BITS3(0,1,0): cvtc = 'a'; rm = Irrm_NEAREST; break;
13999             case BITS3(0,0,0): cvtc = 'n'; rm = Irrm_NEAREST; break;
14000             case BITS3(1,1,1): cvtc = 'p'; rm = Irrm_PosINF;  break;
14001             case BITS3(1,0,1): cvtc = 'm'; rm = Irrm_NegINF;  break;
14002             case BITS3(0,1,1): cvtc = 'z'; rm = Irrm_ZERO;    break;
14003             case BITS3(0,0,1): cvtc = 'x'; rm = Irrm_NEAREST; break;
14004             case BITS3(1,0,0):
14005             case BITS3(1,1,0):
14006             default: vassert(0);
14007          }
14008
14009          IRTemp r0 = newTemp(Ity_F32);
14010          IRTemp r1 = newTemp(Ity_F32);
14011          IRTemp r2 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
14012          IRTemp r3 = isQ ? newTemp(Ity_F32) : IRTemp_INVALID;
14013
14014          IRExpr* rmE = mkU32((UInt)rm);
14015          IROp    rnd = Iop_RoundF32toInt;
14016
14017          assign(r0, binop(rnd, rmE, llGetFReg_up_to_64(mmF+0)));
14018          assign(r1, binop(rnd, rmE, llGetFReg_up_to_64(mmF+1)));
14019          if (isQ) {
14020             assign(r2, binop(rnd, rmE, llGetFReg_up_to_64(mmF+2)));
14021             assign(r3, binop(rnd, rmE, llGetFReg_up_to_64(mmF+3)));
14022          }
14023
14024          llPutFReg_up_to_64(ddF+0, mkexpr(r0));
14025          llPutFReg_up_to_64(ddF+1, mkexpr(r1));
14026          if (isQ) {
14027             llPutFReg_up_to_64(ddF+2, mkexpr(r2));
14028             llPutFReg_up_to_64(ddF+3, mkexpr(r3));
14029          }
14030
14031          HChar rch = isQ ? 'q' : 'd';
14032          UInt  sh  = isQ ? 1 : 0;
14033          DIP("vrint%c.f32.f32 %c%u, %c%u\n",
14034              cvtc, rch, dd >> sh, rch, mm >> sh);
14035          return True;
14036       }
14037       /* else fall through */
14038    }
14039
14040    /* ---------- Doesn't match anything. ---------- */
14041    return False;
14042
14043 #  undef INSN
14044 }
14045
14046
14047 /*------------------------------------------------------------*/
14048 /*--- LDMxx/STMxx helper (both ARM and Thumb32)            ---*/
14049 /*------------------------------------------------------------*/
14050
14051 /* Generate IR for LDMxx and STMxx.  This is complex.  Assumes it's
14052    unconditional, so the caller must produce a jump-around before
14053    calling this, if the insn is to be conditional.  Caller is
14054    responsible for all validation of parameters.  For LDMxx, if PC is
14055    amongst the values loaded, caller is also responsible for
14056    generating the jump. */
14057 static void mk_ldm_stm ( Bool arm,     /* True: ARM, False: Thumb */
14058                          UInt rN,      /* base reg */
14059                          UInt bINC,    /* 1: inc,  0: dec */
14060                          UInt bBEFORE, /* 1: inc/dec before, 0: after */
14061                          UInt bW,      /* 1: writeback to Rn */
14062                          UInt bL,      /* 1: load, 0: store */
14063                          UInt regList )
14064 {
14065    Int i, r, m, nRegs;
14066    IRTemp jk = Ijk_Boring;
14067
14068    /* Get hold of the old Rn value.  We might need to write its value
14069       to memory during a store, and if it's also the writeback
14070       register then we need to get its value now.  We can't treat it
14071       exactly like the other registers we're going to transfer,
14072       because for xxMDA and xxMDB writeback forms, the generated IR
14073       updates Rn in the guest state before any transfers take place.
14074       We have to do this as per comments below, in order that if Rn is
14075       the stack pointer then it always has a value is below or equal
14076       to any of the transfer addresses.  Ick. */
14077    IRTemp oldRnT = newTemp(Ity_I32);
14078    assign(oldRnT, arm ? getIRegA(rN) : getIRegT(rN));
14079
14080    IRTemp anchorT = newTemp(Ity_I32);
14081    /* The old (Addison-Wesley) ARM ARM seems to say that LDMxx/STMxx
14082       ignore the bottom two bits of the address.  However, Cortex-A8
14083       doesn't seem to care.  Hence: */
14084    /* No .. don't force alignment .. */
14085    /* assign(anchorT, binop(Iop_And32, mkexpr(oldRnT), mkU32(~3U))); */
14086    /* Instead, use the potentially misaligned address directly. */
14087    assign(anchorT, mkexpr(oldRnT));
14088
14089    IROp opADDorSUB = bINC ? Iop_Add32 : Iop_Sub32;
14090    // bINC == 1:  xxMIA, xxMIB
14091    // bINC == 0:  xxMDA, xxMDB
14092
14093    // For xxMDA and xxMDB, update Rn first if necessary.  We have
14094    // to do this first so that, for the common idiom of the transfers
14095    // faulting because we're pushing stuff onto a stack and the stack
14096    // is growing down onto allocate-on-fault pages (as Valgrind simulates),
14097    // we need to have the SP up-to-date "covering" (pointing below) the
14098    // transfer area.  For the same reason, if we are doing xxMIA or xxMIB,
14099    // do the transfer first, and then update rN afterwards.
14100    nRegs = 0;
14101    for (i = 0; i < 16; i++) {
14102      if ((regList & (1 << i)) != 0)
14103          nRegs++;
14104    }
14105    if (bW == 1 && !bINC) {
14106       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
14107       if (arm)
14108          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
14109       else
14110          putIRegT( rN, e, IRTemp_INVALID );
14111    }
14112
14113    // Make up a list of the registers to transfer, and their offsets
14114    // in memory relative to the anchor.  If the base reg (Rn) is part
14115    // of the transfer, then do it last for a load and first for a store.
14116    UInt xReg[16], xOff[16];
14117    Int  nX = 0;
14118    m = 0;
14119    for (i = 0; i < 16; i++) {
14120       r = bINC ? i : (15-i);
14121       if (0 == (regList & (1<<r)))
14122          continue;
14123       if (bBEFORE)
14124          m++;
14125       /* paranoia: check we aren't transferring the writeback
14126          register during a load. Should be assured by decode-point
14127          check above. */
14128       if (bW == 1 && bL == 1)
14129          vassert(r != rN);
14130
14131       xOff[nX] = 4 * m;
14132       xReg[nX] = r;
14133       nX++;
14134
14135       if (!bBEFORE)
14136          m++;
14137    }
14138    vassert(m == nRegs);
14139    vassert(nX == nRegs);
14140    vassert(nX <= 16);
14141
14142    if (bW == 0 && (regList & (1<<rN)) != 0) {
14143       /* Non-writeback, and basereg is to be transferred.  Do its
14144          transfer last for a load and first for a store.  Requires
14145          reordering xOff/xReg. */
14146       if (0) {
14147          vex_printf("\nREG_LIST_PRE: (rN=%u)\n", rN);
14148          for (i = 0; i < nX; i++)
14149             vex_printf("reg %u   off %u\n", xReg[i], xOff[i]);
14150          vex_printf("\n");
14151       }
14152
14153       vassert(nX > 0);
14154       for (i = 0; i < nX; i++) {
14155          if (xReg[i] == rN)
14156              break;
14157       }
14158       vassert(i < nX); /* else we didn't find it! */
14159       UInt tReg = xReg[i];
14160       UInt tOff = xOff[i];
14161       if (bL == 1) {
14162          /* load; make this transfer happen last */
14163          if (i < nX-1) {
14164             for (m = i+1; m < nX; m++) {
14165                xReg[m-1] = xReg[m];
14166                xOff[m-1] = xOff[m];
14167             }
14168             vassert(m == nX);
14169             xReg[m-1] = tReg;
14170             xOff[m-1] = tOff;
14171          }
14172       } else {
14173          /* store; make this transfer happen first */
14174          if (i > 0) {
14175             for (m = i-1; m >= 0; m--) {
14176                xReg[m+1] = xReg[m];
14177                xOff[m+1] = xOff[m];
14178             }
14179             vassert(m == -1);
14180             xReg[0] = tReg;
14181             xOff[0] = tOff;
14182          }
14183       }
14184
14185       if (0) {
14186          vex_printf("REG_LIST_POST:\n");
14187          for (i = 0; i < nX; i++)
14188             vex_printf("reg %u   off %u\n", xReg[i], xOff[i]);
14189          vex_printf("\n");
14190       }
14191    }
14192
14193    /* According to the Cortex A8 TRM Sec. 5.2.1, LDM(1) with r13 as the base
14194        register and PC in the register list is a return for purposes of branch
14195        prediction.
14196       The ARM ARM Sec. C9.10.1 further specifies that writeback must be enabled
14197        to be counted in event 0x0E (Procedure return).*/
14198    if (rN == 13 && bL == 1 && bINC && !bBEFORE && bW == 1) {
14199       jk = Ijk_Ret;
14200    }
14201
14202    /* Actually generate the transfers */
14203    for (i = 0; i < nX; i++) {
14204       r = xReg[i];
14205       if (bL == 1) {
14206          IRExpr* e = loadLE(Ity_I32,
14207                             binop(opADDorSUB, mkexpr(anchorT),
14208                                   mkU32(xOff[i])));
14209          if (arm) {
14210             putIRegA( r, e, IRTemp_INVALID, jk );
14211          } else {
14212             // no: putIRegT( r, e, IRTemp_INVALID );
14213             // putIRegT refuses to write to R15.  But that might happen.
14214             // Since this is uncond, and we need to be able to
14215             // write the PC, just use the low level put:
14216             llPutIReg( r, e );
14217          }
14218       } else {
14219          /* if we're storing Rn, make sure we use the correct
14220             value, as per extensive comments above */
14221          storeLE( binop(opADDorSUB, mkexpr(anchorT), mkU32(xOff[i])),
14222                   r == rN ? mkexpr(oldRnT)
14223                           : (arm ? getIRegA(r) : getIRegT(r) ) );
14224       }
14225    }
14226
14227    // If we are doing xxMIA or xxMIB,
14228    // do the transfer first, and then update rN afterwards.
14229    if (bW == 1 && bINC) {
14230       IRExpr* e = binop(opADDorSUB, mkexpr(oldRnT), mkU32(4*nRegs));
14231       if (arm)
14232          putIRegA( rN, e, IRTemp_INVALID, Ijk_Boring );
14233       else
14234          putIRegT( rN, e, IRTemp_INVALID );
14235    }
14236 }
14237
14238
14239 /*------------------------------------------------------------*/
14240 /*--- VFP (CP 10 and 11) instructions                      ---*/
14241 /*------------------------------------------------------------*/
14242
14243 /* Both ARM and Thumb */
14244
14245 /* Translate a CP10 or CP11 instruction.  If successful, returns
14246    True and *dres may or may not be updated.  If failure, returns
14247    False and doesn't change *dres nor create any IR.
14248
14249    The ARM and Thumb encodings are identical for the low 28 bits of
14250    the insn (yay!) and that's what the caller must supply, iow, imm28
14251    has the top 4 bits masked out.  Caller is responsible for
14252    determining whether the masked-out bits are valid for a CP10/11
14253    insn.  The rules for the top 4 bits are:
14254
14255      ARM: 0000 to 1110 allowed, and this is the gating condition.
14256      1111 (NV) is not allowed.
14257
14258      Thumb: must be 1110.  The gating condition is taken from
14259      ITSTATE in the normal way.
14260
14261    Conditionalisation:
14262
14263    Caller must supply an IRTemp 'condT' holding the gating condition,
14264    or IRTemp_INVALID indicating the insn is always executed.
14265
14266    Caller must also supply an ARMCondcode 'cond'.  This is only used
14267    for debug printing, no other purpose.  For ARM, this is simply the
14268    top 4 bits of the original instruction.  For Thumb, the condition
14269    is not (really) known until run time, and so ARMCondAL should be
14270    passed, only so that printing of these instructions does not show
14271    any condition.
14272
14273    Finally, the caller must indicate whether this occurs in ARM or
14274    Thumb code.
14275 */
14276 static Bool decode_CP10_CP11_instruction (
14277                /*MOD*/DisResult* dres,
14278                UInt              insn28,
14279                IRTemp            condT,
14280                ARMCondcode       conq,
14281                Bool              isT
14282             )
14283 {
14284 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn28, (_bMax), (_bMin))
14285
14286    vassert(INSN(31,28) == BITS4(0,0,0,0)); // caller's obligation
14287
14288    if (isT) {
14289       vassert(conq == ARMCondAL);
14290    } else {
14291       vassert(conq >= ARMCondEQ && conq <= ARMCondAL);
14292    }
14293
14294    /* ----------------------------------------------------------- */
14295    /* -- VFP instructions -- double precision (mostly)         -- */
14296    /* ----------------------------------------------------------- */
14297
14298    /* --------------------- fldmx, fstmx --------------------- */
14299    /*
14300                                  31   27   23   19 15 11   7   0
14301                                          P U WL
14302       C4-100, C5-26  1  FSTMX    cond 1100 1000 Rn Dd 1011 offset
14303       C4-100, C5-28  2  FSTMIAX  cond 1100 1010 Rn Dd 1011 offset
14304       C4-100, C5-30  3  FSTMDBX  cond 1101 0010 Rn Dd 1011 offset
14305
14306       C4-42, C5-26   1  FLDMX    cond 1100 1001 Rn Dd 1011 offset
14307       C4-42, C5-28   2  FLDMIAX  cond 1100 1011 Rn Dd 1011 offset
14308       C4-42, C5-30   3  FLDMDBX  cond 1101 0011 Rn Dd 1011 offset
14309
14310       Regs transferred: Dd .. D(d + (offset-3)/2)
14311       offset must be odd, must not imply a reg > 15
14312       IA/DB: Rn is changed by (4 + 8 x # regs transferred)
14313
14314       case coding:
14315          1  at-Rn   (access at Rn)
14316          2  ia-Rn   (access at Rn, then Rn += 4+8n)
14317          3  db-Rn   (Rn -= 4+8n,   then access at Rn)
14318    */
14319    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
14320        && INSN(11,8) == BITS4(1,0,1,1)) {
14321       UInt bP      = (insn28 >> 24) & 1;
14322       UInt bU      = (insn28 >> 23) & 1;
14323       UInt bW      = (insn28 >> 21) & 1;
14324       UInt bL      = (insn28 >> 20) & 1;
14325       UInt offset  = (insn28 >> 0) & 0xFF;
14326       UInt rN      = INSN(19,16);
14327       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
14328       UInt nRegs   = (offset - 1) / 2;
14329       UInt summary = 0;
14330       Int  i;
14331
14332       /**/ if (bP == 0 && bU == 1 && bW == 0) {
14333          summary = 1;
14334       }
14335       else if (bP == 0 && bU == 1 && bW == 1) {
14336          summary = 2;
14337       }
14338       else if (bP == 1 && bU == 0 && bW == 1) {
14339          summary = 3;
14340       }
14341       else goto after_vfp_fldmx_fstmx;
14342
14343       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
14344       if (rN == 15 && (summary == 2 || summary == 3 || isT))
14345          goto after_vfp_fldmx_fstmx;
14346
14347       /* offset must be odd, and specify at least one register */
14348       if (0 == (offset & 1) || offset < 3)
14349          goto after_vfp_fldmx_fstmx;
14350
14351       /* can't transfer regs after D15 */
14352       if (dD + nRegs - 1 >= 32)
14353          goto after_vfp_fldmx_fstmx;
14354
14355       /* Now, we can't do a conditional load or store, since that very
14356          likely will generate an exception.  So we have to take a side
14357          exit at this point if the condition is false. */
14358       if (condT != IRTemp_INVALID) {
14359          if (isT)
14360             mk_skip_over_T32_if_cond_is_false( condT );
14361          else
14362             mk_skip_over_A32_if_cond_is_false( condT );
14363          condT = IRTemp_INVALID;
14364       }
14365       /* Ok, now we're unconditional.  Do the load or store. */
14366
14367       /* get the old Rn value */
14368       IRTemp rnT = newTemp(Ity_I32);
14369       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
14370                            rN == 15));
14371
14372       /* make a new value for Rn, post-insn */
14373       IRTemp rnTnew = IRTemp_INVALID;
14374       if (summary == 2 || summary == 3) {
14375          rnTnew = newTemp(Ity_I32);
14376          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
14377                               mkexpr(rnT),
14378                               mkU32(4 + 8 * nRegs)));
14379       }
14380
14381       /* decide on the base transfer address */
14382       IRTemp taT = newTemp(Ity_I32);
14383       assign(taT,  summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
14384
14385       /* update Rn if necessary -- in case 3, we're moving it down, so
14386          update before any memory reference, in order to keep Memcheck
14387          and V's stack-extending logic (on linux) happy */
14388       if (summary == 3) {
14389          if (isT)
14390             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14391          else
14392             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14393       }
14394
14395       /* generate the transfers */
14396       for (i = 0; i < nRegs; i++) {
14397          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
14398          if (bL) {
14399             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
14400          } else {
14401             storeLE(addr, getDReg(dD + i));
14402          }
14403       }
14404
14405       /* update Rn if necessary -- in case 2, we're moving it up, so
14406          update after any memory reference, in order to keep Memcheck
14407          and V's stack-extending logic (on linux) happy */
14408       if (summary == 2) {
14409          if (isT)
14410             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14411          else
14412             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14413       }
14414
14415       const HChar* nm = bL==1 ? "ld" : "st";
14416       switch (summary) {
14417          case 1:  DIP("f%smx%s r%u, {d%u-d%u}\n",
14418                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14419                   break;
14420          case 2:  DIP("f%smiax%s r%u!, {d%u-d%u}\n",
14421                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14422                   break;
14423          case 3:  DIP("f%smdbx%s r%u!, {d%u-d%u}\n",
14424                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14425                   break;
14426          default: vassert(0);
14427       }
14428
14429       goto decode_success_vfp;
14430       /* FIXME alignment constraints? */
14431    }
14432
14433   after_vfp_fldmx_fstmx:
14434
14435    /* --------------------- fldmd, fstmd --------------------- */
14436    /*
14437                                  31   27   23   19 15 11   7   0
14438                                          P U WL
14439       C4-96, C5-26   1  FSTMD    cond 1100 1000 Rn Dd 1011 offset
14440       C4-96, C5-28   2  FSTMDIA  cond 1100 1010 Rn Dd 1011 offset
14441       C4-96, C5-30   3  FSTMDDB  cond 1101 0010 Rn Dd 1011 offset
14442
14443       C4-38, C5-26   1  FLDMD    cond 1100 1001 Rn Dd 1011 offset
14444       C4-38, C5-28   2  FLDMIAD  cond 1100 1011 Rn Dd 1011 offset
14445       C4-38, C5-30   3  FLDMDBD  cond 1101 0011 Rn Dd 1011 offset
14446
14447       Regs transferred: Dd .. D(d + (offset-2)/2)
14448       offset must be even, must not imply a reg > 15
14449       IA/DB: Rn is changed by (8 x # regs transferred)
14450
14451       case coding:
14452          1  at-Rn   (access at Rn)
14453          2  ia-Rn   (access at Rn, then Rn += 8n)
14454          3  db-Rn   (Rn -= 8n,     then access at Rn)
14455    */
14456    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
14457        && INSN(11,8) == BITS4(1,0,1,1)) {
14458       UInt bP      = (insn28 >> 24) & 1;
14459       UInt bU      = (insn28 >> 23) & 1;
14460       UInt bW      = (insn28 >> 21) & 1;
14461       UInt bL      = (insn28 >> 20) & 1;
14462       UInt offset  = (insn28 >> 0) & 0xFF;
14463       UInt rN      = INSN(19,16);
14464       UInt dD      = (INSN(22,22) << 4) | INSN(15,12);
14465       UInt nRegs   = offset / 2;
14466       UInt summary = 0;
14467       Int  i;
14468
14469       /**/ if (bP == 0 && bU == 1 && bW == 0) {
14470          summary = 1;
14471       }
14472       else if (bP == 0 && bU == 1 && bW == 1) {
14473          summary = 2;
14474       }
14475       else if (bP == 1 && bU == 0 && bW == 1) {
14476          summary = 3;
14477       }
14478       else goto after_vfp_fldmd_fstmd;
14479
14480       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
14481       if (rN == 15 && (summary == 2 || summary == 3 || isT))
14482          goto after_vfp_fldmd_fstmd;
14483
14484       /* offset must be even, and specify at least one register */
14485       if (1 == (offset & 1) || offset < 2)
14486          goto after_vfp_fldmd_fstmd;
14487
14488       /* can't transfer regs after D15 */
14489       if (dD + nRegs - 1 >= 32)
14490          goto after_vfp_fldmd_fstmd;
14491
14492       /* Now, we can't do a conditional load or store, since that very
14493          likely will generate an exception.  So we have to take a side
14494          exit at this point if the condition is false. */
14495       if (condT != IRTemp_INVALID) {
14496          if (isT)
14497             mk_skip_over_T32_if_cond_is_false( condT );
14498          else
14499             mk_skip_over_A32_if_cond_is_false( condT );
14500          condT = IRTemp_INVALID;
14501       }
14502       /* Ok, now we're unconditional.  Do the load or store. */
14503
14504       /* get the old Rn value */
14505       IRTemp rnT = newTemp(Ity_I32);
14506       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
14507                            rN == 15));
14508
14509       /* make a new value for Rn, post-insn */
14510       IRTemp rnTnew = IRTemp_INVALID;
14511       if (summary == 2 || summary == 3) {
14512          rnTnew = newTemp(Ity_I32);
14513          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
14514                               mkexpr(rnT),
14515                               mkU32(8 * nRegs)));
14516       }
14517
14518       /* decide on the base transfer address */
14519       IRTemp taT = newTemp(Ity_I32);
14520       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
14521
14522       /* update Rn if necessary -- in case 3, we're moving it down, so
14523          update before any memory reference, in order to keep Memcheck
14524          and V's stack-extending logic (on linux) happy */
14525       if (summary == 3) {
14526          if (isT)
14527             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14528          else
14529             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14530       }
14531
14532       /* generate the transfers */
14533       for (i = 0; i < nRegs; i++) {
14534          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(8*i));
14535          if (bL) {
14536             putDReg(dD + i, loadLE(Ity_F64, addr), IRTemp_INVALID);
14537          } else {
14538             storeLE(addr, getDReg(dD + i));
14539          }
14540       }
14541
14542       /* update Rn if necessary -- in case 2, we're moving it up, so
14543          update after any memory reference, in order to keep Memcheck
14544          and V's stack-extending logic (on linux) happy */
14545       if (summary == 2) {
14546          if (isT)
14547             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
14548          else
14549             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
14550       }
14551
14552       const HChar* nm = bL==1 ? "ld" : "st";
14553       switch (summary) {
14554          case 1:  DIP("f%smd%s r%u, {d%u-d%u}\n",
14555                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14556                   break;
14557          case 2:  DIP("f%smiad%s r%u!, {d%u-d%u}\n",
14558                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14559                   break;
14560          case 3:  DIP("f%smdbd%s r%u!, {d%u-d%u}\n",
14561                       nm, nCC(conq), rN, dD, dD + nRegs - 1);
14562                   break;
14563          default: vassert(0);
14564       }
14565
14566       goto decode_success_vfp;
14567       /* FIXME alignment constraints? */
14568    }
14569
14570   after_vfp_fldmd_fstmd:
14571
14572    /* ------------------- fmrx, fmxr ------------------- */
14573    if (BITS8(1,1,1,0,1,1,1,1) == INSN(27,20)
14574        && BITS4(1,0,1,0) == INSN(11,8)
14575        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
14576       UInt rD  = INSN(15,12);
14577       UInt reg = INSN(19,16);
14578       if (reg == BITS4(0,0,0,1)) {
14579          if (rD == 15) {
14580             IRTemp nzcvT = newTemp(Ity_I32);
14581             /* When rD is 15, we are copying the top 4 bits of FPSCR
14582                into CPSR.  That is, set the flags thunk to COPY and
14583                install FPSCR[31:28] as the value to copy. */
14584             assign(nzcvT, binop(Iop_And32,
14585                                 IRExpr_Get(OFFB_FPSCR, Ity_I32),
14586                                 mkU32(0xF0000000)));
14587             setFlags_D1(ARMG_CC_OP_COPY, nzcvT, condT);
14588             DIP("fmstat%s\n", nCC(conq));
14589          } else {
14590             /* Otherwise, merely transfer FPSCR to r0 .. r14. */
14591             IRExpr* e = IRExpr_Get(OFFB_FPSCR, Ity_I32);
14592             if (isT)
14593                putIRegT(rD, e, condT);
14594             else
14595                putIRegA(rD, e, condT, Ijk_Boring);
14596             DIP("fmrx%s r%u, fpscr\n", nCC(conq), rD);
14597          }
14598          goto decode_success_vfp;
14599       }
14600       /* fall through */
14601    }
14602
14603    if (BITS8(1,1,1,0,1,1,1,0) == INSN(27,20)
14604        && BITS4(1,0,1,0) == INSN(11,8)
14605        && BITS8(0,0,0,1,0,0,0,0) == (insn28 & 0xFF)) {
14606       UInt rD  = INSN(15,12);
14607       UInt reg = INSN(19,16);
14608       if (reg == BITS4(0,0,0,1)) {
14609          putMiscReg32(OFFB_FPSCR,
14610                       isT ? getIRegT(rD) : getIRegA(rD), condT);
14611          DIP("fmxr%s fpscr, r%u\n", nCC(conq), rD);
14612          goto decode_success_vfp;
14613       }
14614       /* fall through */
14615    }
14616
14617    /* --------------------- vmov --------------------- */
14618    // VMOV dM, rD, rN
14619    if (0x0C400B10 == (insn28 & 0x0FF00FD0)) {
14620       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
14621       UInt rD = INSN(15,12); /* lo32 */
14622       UInt rN = INSN(19,16); /* hi32 */
14623       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))) {
14624          /* fall through */
14625       } else {
14626          putDReg(dM,
14627                  unop(Iop_ReinterpI64asF64,
14628                       binop(Iop_32HLto64,
14629                             isT ? getIRegT(rN) : getIRegA(rN),
14630                             isT ? getIRegT(rD) : getIRegA(rD))),
14631                  condT);
14632          DIP("vmov%s d%u, r%u, r%u\n", nCC(conq), dM, rD, rN);
14633          goto decode_success_vfp;
14634       }
14635       /* fall through */
14636    }
14637
14638    // VMOV rD, rN, dM
14639    if (0x0C500B10 == (insn28 & 0x0FF00FD0)) {
14640       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
14641       UInt rD = INSN(15,12); /* lo32 */
14642       UInt rN = INSN(19,16); /* hi32 */
14643       if (rD == 15 || rN == 15 || (isT && (rD == 13 || rN == 13))
14644           || rD == rN) {
14645          /* fall through */
14646       } else {
14647          IRTemp i64 = newTemp(Ity_I64);
14648          assign(i64, unop(Iop_ReinterpF64asI64, getDReg(dM)));
14649          IRExpr* hi32 = unop(Iop_64HIto32, mkexpr(i64));
14650          IRExpr* lo32 = unop(Iop_64to32,   mkexpr(i64));
14651          if (isT) {
14652             putIRegT(rN, hi32, condT);
14653             putIRegT(rD, lo32, condT);
14654          } else {
14655             putIRegA(rN, hi32, condT, Ijk_Boring);
14656             putIRegA(rD, lo32, condT, Ijk_Boring);
14657          }
14658          DIP("vmov%s r%u, r%u, d%u\n", nCC(conq), rD, rN, dM);
14659          goto decode_success_vfp;
14660       }
14661       /* fall through */
14662    }
14663
14664    // VMOV sD, sD+1, rN, rM
14665    if (0x0C400A10 == (insn28 & 0x0FF00FD0)) {
14666       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
14667       UInt rN = INSN(15,12);
14668       UInt rM = INSN(19,16);
14669       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
14670           || sD == 31) {
14671          /* fall through */
14672       } else {
14673          putFReg(sD,
14674                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rN) : getIRegA(rN)),
14675                  condT);
14676          putFReg(sD+1,
14677                  unop(Iop_ReinterpI32asF32, isT ? getIRegT(rM) : getIRegA(rM)),
14678                  condT);
14679          DIP("vmov%s, s%u, s%u, r%u, r%u\n",
14680               nCC(conq), sD, sD + 1, rN, rM);
14681          goto decode_success_vfp;
14682       }
14683    }
14684
14685    // VMOV rN, rM, sD, sD+1
14686    if (0x0C500A10 == (insn28 & 0x0FF00FD0)) {
14687       UInt sD = (INSN(3,0) << 1) | INSN(5,5);
14688       UInt rN = INSN(15,12);
14689       UInt rM = INSN(19,16);
14690       if (rM == 15 || rN == 15 || (isT && (rM == 13 || rN == 13))
14691           || sD == 31 || rN == rM) {
14692          /* fall through */
14693       } else {
14694          IRExpr* res0 = unop(Iop_ReinterpF32asI32, getFReg(sD));
14695          IRExpr* res1 = unop(Iop_ReinterpF32asI32, getFReg(sD+1));
14696          if (isT) {
14697             putIRegT(rN, res0, condT);
14698             putIRegT(rM, res1, condT);
14699          } else {
14700             putIRegA(rN, res0, condT, Ijk_Boring);
14701             putIRegA(rM, res1, condT, Ijk_Boring);
14702          }
14703          DIP("vmov%s, r%u, r%u, s%u, s%u\n",
14704              nCC(conq), rN, rM, sD, sD + 1);
14705          goto decode_success_vfp;
14706       }
14707    }
14708
14709    // VMOV rD[x], rT  (ARM core register to scalar)
14710    if (0x0E000B10 == (insn28 & 0x0F900F1F)) {
14711       UInt rD  = (INSN(7,7) << 4) | INSN(19,16);
14712       UInt rT  = INSN(15,12);
14713       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
14714       UInt index;
14715       if (rT == 15 || (isT && rT == 13)) {
14716          /* fall through */
14717       } else {
14718          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
14719             index = opc & 7;
14720             putDRegI64(rD, triop(Iop_SetElem8x8,
14721                                  getDRegI64(rD),
14722                                  mkU8(index),
14723                                  unop(Iop_32to8,
14724                                       isT ? getIRegT(rT) : getIRegA(rT))),
14725                            condT);
14726             DIP("vmov%s.8 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
14727             goto decode_success_vfp;
14728          }
14729          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
14730             index = (opc >> 1) & 3;
14731             putDRegI64(rD, triop(Iop_SetElem16x4,
14732                                  getDRegI64(rD),
14733                                  mkU8(index),
14734                                  unop(Iop_32to16,
14735                                       isT ? getIRegT(rT) : getIRegA(rT))),
14736                            condT);
14737             DIP("vmov%s.16 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
14738             goto decode_success_vfp;
14739          }
14740          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0)) {
14741             index = (opc >> 2) & 1;
14742             putDRegI64(rD, triop(Iop_SetElem32x2,
14743                                  getDRegI64(rD),
14744                                  mkU8(index),
14745                                  isT ? getIRegT(rT) : getIRegA(rT)),
14746                            condT);
14747             DIP("vmov%s.32 d%u[%u], r%u\n", nCC(conq), rD, index, rT);
14748             goto decode_success_vfp;
14749          } else {
14750             /* fall through */
14751          }
14752       }
14753    }
14754
14755    // VMOV (scalar to ARM core register)
14756    // VMOV rT, rD[x]
14757    if (0x0E100B10 == (insn28 & 0x0F100F1F)) {
14758       UInt rN  = (INSN(7,7) << 4) | INSN(19,16);
14759       UInt rT  = INSN(15,12);
14760       UInt U   = INSN(23,23);
14761       UInt opc = (INSN(22,21) << 2) | INSN(6,5);
14762       UInt index;
14763       if (rT == 15 || (isT && rT == 13)) {
14764          /* fall through */
14765       } else {
14766          if ((opc & BITS4(1,0,0,0)) == BITS4(1,0,0,0)) {
14767             index = opc & 7;
14768             IRExpr* e = unop(U ? Iop_8Uto32 : Iop_8Sto32,
14769                              binop(Iop_GetElem8x8,
14770                                    getDRegI64(rN),
14771                                    mkU8(index)));
14772             if (isT)
14773                putIRegT(rT, e, condT);
14774             else
14775                putIRegA(rT, e, condT, Ijk_Boring);
14776             DIP("vmov%s.%c8 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
14777                   rT, rN, index);
14778             goto decode_success_vfp;
14779          }
14780          else if ((opc & BITS4(1,0,0,1)) == BITS4(0,0,0,1)) {
14781             index = (opc >> 1) & 3;
14782             IRExpr* e = unop(U ? Iop_16Uto32 : Iop_16Sto32,
14783                              binop(Iop_GetElem16x4,
14784                                    getDRegI64(rN),
14785                                    mkU8(index)));
14786             if (isT)
14787                putIRegT(rT, e, condT);
14788             else
14789                putIRegA(rT, e, condT, Ijk_Boring);
14790             DIP("vmov%s.%c16 r%u, d%u[%u]\n", nCC(conq), U ? 'u' : 's',
14791                   rT, rN, index);
14792             goto decode_success_vfp;
14793          }
14794          else if ((opc & BITS4(1,0,1,1)) == BITS4(0,0,0,0) && U == 0) {
14795             index = (opc >> 2) & 1;
14796             IRExpr* e = binop(Iop_GetElem32x2, getDRegI64(rN), mkU8(index));
14797             if (isT)
14798                putIRegT(rT, e, condT);
14799             else
14800                putIRegA(rT, e, condT, Ijk_Boring);
14801             DIP("vmov%s.32 r%u, d%u[%u]\n", nCC(conq), rT, rN, index);
14802             goto decode_success_vfp;
14803          } else {
14804             /* fall through */
14805          }
14806       }
14807    }
14808
14809    // VMOV.F32 sD, #imm
14810    // FCONSTS sD, #imm
14811    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14812        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,0)) {
14813       UInt rD   = (INSN(15,12) << 1) | INSN(22,22);
14814       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
14815       UInt b    = (imm8 >> 6) & 1;
14816       UInt imm;
14817       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,(imm8 >> 5) & 1) << 8)
14818              | ((imm8 & 0x1f) << 3);
14819       imm <<= 16;
14820       putFReg(rD, unop(Iop_ReinterpI32asF32, mkU32(imm)), condT);
14821       DIP("fconsts%s s%u #%u", nCC(conq), rD, imm8);
14822       goto decode_success_vfp;
14823    }
14824
14825    // VMOV.F64 dD, #imm
14826    // FCONSTD dD, #imm
14827    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
14828        && BITS4(0,0,0,0) == INSN(7,4) && INSN(11,8) == BITS4(1,0,1,1)) {
14829       UInt rD   = INSN(15,12) | (INSN(22,22) << 4);
14830       UInt imm8 = (INSN(19,16) << 4) | INSN(3,0);
14831       UInt b    = (imm8 >> 6) & 1;
14832       ULong imm;
14833       imm = (BITS8((imm8 >> 7) & 1,(~b) & 1,b,b,b,b,b,b) << 8)
14834              | BITS8(b,b,0,0,0,0,0,0) | (imm8 & 0x3f);
14835       imm <<= 48;
14836       putDReg(rD, unop(Iop_ReinterpI64asF64, mkU64(imm)), condT);
14837       DIP("fconstd%s d%u #%u", nCC(conq), rD, imm8);
14838       goto decode_success_vfp;
14839    }
14840
14841    /* ---------------------- vdup ------------------------- */
14842    // VDUP dD, rT
14843    // VDUP qD, rT
14844    if (BITS8(1,1,1,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,1))
14845        && BITS4(1,0,1,1) == INSN(11,8) && INSN(6,6) == 0 && INSN(4,4) == 1) {
14846       UInt rD   = (INSN(7,7) << 4) | INSN(19,16);
14847       UInt rT   = INSN(15,12);
14848       UInt Q    = INSN(21,21);
14849       UInt size = (INSN(22,22) << 1) | INSN(5,5);
14850       if (rT == 15 || (isT && rT == 13) || size == 3 || (Q && (rD & 1))) {
14851          /* fall through */
14852       } else {
14853          IRExpr* e = isT ? getIRegT(rT) : getIRegA(rT);
14854          if (Q) {
14855             rD >>= 1;
14856             switch (size) {
14857                case 0:
14858                   putQReg(rD, unop(Iop_Dup32x4, e), condT);
14859                   break;
14860                case 1:
14861                   putQReg(rD, unop(Iop_Dup16x8, unop(Iop_32to16, e)),
14862                               condT);
14863                   break;
14864                case 2:
14865                   putQReg(rD, unop(Iop_Dup8x16, unop(Iop_32to8, e)),
14866                               condT);
14867                   break;
14868                default:
14869                   vassert(0);
14870             }
14871             DIP("vdup.%d q%u, r%u\n", 32 / (1<<size), rD, rT);
14872          } else {
14873             switch (size) {
14874                case 0:
14875                   putDRegI64(rD, unop(Iop_Dup32x2, e), condT);
14876                   break;
14877                case 1:
14878                   putDRegI64(rD, unop(Iop_Dup16x4, unop(Iop_32to16, e)),
14879                                condT);
14880                   break;
14881                case 2:
14882                   putDRegI64(rD, unop(Iop_Dup8x8, unop(Iop_32to8, e)),
14883                                condT);
14884                   break;
14885                default:
14886                   vassert(0);
14887             }
14888             DIP("vdup.%d d%u, r%u\n", 32 / (1<<size), rD, rT);
14889          }
14890          goto decode_success_vfp;
14891       }
14892    }
14893
14894    /* --------------------- f{ld,st}d --------------------- */
14895    // FLDD, FSTD
14896    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
14897        && BITS4(1,0,1,1) == INSN(11,8)) {
14898       UInt dD     = INSN(15,12) | (INSN(22,22) << 4);
14899       UInt rN     = INSN(19,16);
14900       UInt offset = (insn28 & 0xFF) << 2;
14901       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
14902       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
14903       /* make unconditional */
14904       if (condT != IRTemp_INVALID) {
14905          if (isT)
14906             mk_skip_over_T32_if_cond_is_false( condT );
14907          else
14908             mk_skip_over_A32_if_cond_is_false( condT );
14909          condT = IRTemp_INVALID;
14910       }
14911       IRTemp ea = newTemp(Ity_I32);
14912       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
14913                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
14914                                 rN == 15),
14915                        mkU32(offset)));
14916       if (bL) {
14917          putDReg(dD, loadLE(Ity_F64,mkexpr(ea)), IRTemp_INVALID);
14918       } else {
14919          storeLE(mkexpr(ea), getDReg(dD));
14920       }
14921       DIP("f%sd%s d%u, [r%u, %c#%u]\n",
14922           bL ? "ld" : "st", nCC(conq), dD, rN,
14923           bU ? '+' : '-', offset);
14924       goto decode_success_vfp;
14925    }
14926
14927    /* --------------------- dp insns (D) --------------------- */
14928    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
14929        && BITS4(1,0,1,1) == INSN(11,8)
14930        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
14931       UInt    dM  = INSN(3,0)   | (INSN(5,5) << 4);       /* argR */
14932       UInt    dD  = INSN(15,12) | (INSN(22,22) << 4);   /* dst/acc */
14933       UInt    dN  = INSN(19,16) | (INSN(7,7) << 4);     /* argL */
14934       UInt    bP  = (insn28 >> 23) & 1;
14935       UInt    bQ  = (insn28 >> 21) & 1;
14936       UInt    bR  = (insn28 >> 20) & 1;
14937       UInt    bS  = (insn28 >> 6) & 1;
14938       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
14939       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
14940       switch (opc) {
14941          case BITS4(0,0,0,0): /* MAC: d + n * m */
14942             putDReg(dD, triop(Iop_AddF64, rm,
14943                               getDReg(dD),
14944                               triop(Iop_MulF64, rm, getDReg(dN),
14945                                                     getDReg(dM))),
14946                         condT);
14947             DIP("fmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14948             goto decode_success_vfp;
14949          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
14950             putDReg(dD, triop(Iop_AddF64, rm,
14951                               getDReg(dD),
14952                               unop(Iop_NegF64,
14953                                    triop(Iop_MulF64, rm, getDReg(dN),
14954                                                          getDReg(dM)))),
14955                         condT);
14956             DIP("fnmacd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14957             goto decode_success_vfp;
14958          case BITS4(0,0,1,0): /* MSC: - d + n * m */
14959             putDReg(dD, triop(Iop_AddF64, rm,
14960                               unop(Iop_NegF64, getDReg(dD)),
14961                               triop(Iop_MulF64, rm, getDReg(dN),
14962                                                     getDReg(dM))),
14963                         condT);
14964             DIP("fmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14965             goto decode_success_vfp;
14966          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
14967             putDReg(dD, triop(Iop_AddF64, rm,
14968                               unop(Iop_NegF64, getDReg(dD)),
14969                               unop(Iop_NegF64,
14970                                    triop(Iop_MulF64, rm, getDReg(dN),
14971                                                          getDReg(dM)))),
14972                         condT);
14973             DIP("fnmscd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14974             goto decode_success_vfp;
14975          case BITS4(0,1,0,0): /* MUL: n * m */
14976             putDReg(dD, triop(Iop_MulF64, rm, getDReg(dN), getDReg(dM)),
14977                         condT);
14978             DIP("fmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14979             goto decode_success_vfp;
14980          case BITS4(0,1,0,1): /* NMUL: - n * m */
14981             putDReg(dD, unop(Iop_NegF64,
14982                              triop(Iop_MulF64, rm, getDReg(dN),
14983                                                    getDReg(dM))),
14984                     condT);
14985             DIP("fnmuld%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14986             goto decode_success_vfp;
14987          case BITS4(0,1,1,0): /* ADD: n + m */
14988             putDReg(dD, triop(Iop_AddF64, rm, getDReg(dN), getDReg(dM)),
14989                         condT);
14990             DIP("faddd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14991             goto decode_success_vfp;
14992          case BITS4(0,1,1,1): /* SUB: n - m */
14993             putDReg(dD, triop(Iop_SubF64, rm, getDReg(dN), getDReg(dM)),
14994                         condT);
14995             DIP("fsubd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
14996             goto decode_success_vfp;
14997          case BITS4(1,0,0,0): /* DIV: n / m */
14998             putDReg(dD, triop(Iop_DivF64, rm, getDReg(dN), getDReg(dM)),
14999                         condT);
15000             DIP("fdivd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15001             goto decode_success_vfp;
15002          case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
15003             /* XXXROUNDINGFIXME look up ARM reference for fused
15004                multiply-add rounding */
15005             putDReg(dD, triop(Iop_AddF64, rm,
15006                               unop(Iop_NegF64, getDReg(dD)),
15007                               triop(Iop_MulF64, rm,
15008                                                 getDReg(dN),
15009                                                 getDReg(dM))),
15010                         condT);
15011             DIP("vfnmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15012             goto decode_success_vfp;
15013          case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
15014             /* XXXROUNDINGFIXME look up ARM reference for fused
15015                multiply-add rounding */
15016             putDReg(dD, triop(Iop_AddF64, rm,
15017                               unop(Iop_NegF64, getDReg(dD)),
15018                               triop(Iop_MulF64, rm,
15019                                                 unop(Iop_NegF64, getDReg(dN)),
15020                                                 getDReg(dM))),
15021                         condT);
15022             DIP("vfnmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15023             goto decode_success_vfp;
15024          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
15025             /* XXXROUNDINGFIXME look up ARM reference for fused
15026                multiply-add rounding */
15027             putDReg(dD, triop(Iop_AddF64, rm,
15028                               getDReg(dD),
15029                               triop(Iop_MulF64, rm, getDReg(dN),
15030                                                     getDReg(dM))),
15031                         condT);
15032             DIP("vfmad%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15033             goto decode_success_vfp;
15034          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
15035             /* XXXROUNDINGFIXME look up ARM reference for fused
15036                multiply-add rounding */
15037             putDReg(dD, triop(Iop_AddF64, rm,
15038                               getDReg(dD),
15039                               triop(Iop_MulF64, rm,
15040                                     unop(Iop_NegF64, getDReg(dN)),
15041                                     getDReg(dM))),
15042                         condT);
15043             DIP("vfmsd%s d%u, d%u, d%u\n", nCC(conq), dD, dN, dM);
15044             goto decode_success_vfp;
15045          default:
15046             break;
15047       }
15048    }
15049
15050    /* --------------------- compares (D) --------------------- */
15051    /*          31   27   23   19   15 11   7    3
15052                  28   24   20   16 12    8    4    0
15053       FCMPD    cond 1110 1D11 0100 Dd 1011 0100 Dm
15054       FCMPED   cond 1110 1D11 0100 Dd 1011 1100 Dm
15055       FCMPZD   cond 1110 1D11 0101 Dd 1011 0100 0000
15056       FCMPZED  cond 1110 1D11 0101 Dd 1011 1100 0000
15057                                  Z         N
15058
15059       Z=0 Compare Dd vs Dm     and set FPSCR 31:28 accordingly
15060       Z=1 Compare Dd vs zero
15061
15062       N=1 generates Invalid Operation exn if either arg is any kind of NaN
15063       N=0 generates Invalid Operation exn if either arg is a signalling NaN
15064       (Not that we pay any attention to N here)
15065    */
15066    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15067        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15068        && BITS4(1,0,1,1) == INSN(11,8)
15069        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15070       UInt bZ = (insn28 >> 16) & 1;
15071       UInt bN = (insn28 >> 7) & 1;
15072       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
15073       UInt dM = INSN(3,0) | (INSN(5,5) << 4);
15074       if (bZ && INSN(3,0) != 0) {
15075          /* does not decode; fall through */
15076       } else {
15077          IRTemp argL = newTemp(Ity_F64);
15078          IRTemp argR = newTemp(Ity_F64);
15079          IRTemp irRes = newTemp(Ity_I32);
15080          assign(argL, getDReg(dD));
15081          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0)) : getDReg(dM));
15082          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
15083
15084          IRTemp nzcv     = IRTemp_INVALID;
15085          IRTemp oldFPSCR = newTemp(Ity_I32);
15086          IRTemp newFPSCR = newTemp(Ity_I32);
15087
15088          /* This is where the fun starts.  We have to convert 'irRes'
15089             from an IR-convention return result (IRCmpF64Result) to an
15090             ARM-encoded (N,Z,C,V) group.  The final result is in the
15091             bottom 4 bits of 'nzcv'. */
15092          /* Map compare result from IR to ARM(nzcv) */
15093          /*
15094             FP cmp result | IR   | ARM(nzcv)
15095             --------------------------------
15096             UN              0x45   0011
15097             LT              0x01   1000
15098             GT              0x00   0010
15099             EQ              0x40   0110
15100          */
15101          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
15102
15103          /* And update FPSCR accordingly */
15104          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
15105          assign(newFPSCR,
15106                 binop(Iop_Or32,
15107                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
15108                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
15109
15110          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
15111
15112          if (bZ) {
15113             DIP("fcmpz%sd%s d%u\n", bN ? "e" : "", nCC(conq), dD);
15114          } else {
15115             DIP("fcmp%sd%s d%u, d%u\n", bN ? "e" : "", nCC(conq), dD, dM);
15116          }
15117          goto decode_success_vfp;
15118       }
15119       /* fall through */
15120    }
15121
15122    /* --------------------- unary (D) --------------------- */
15123    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15124        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15125        && BITS4(1,0,1,1) == INSN(11,8)
15126        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15127       UInt dD  = INSN(15,12) | (INSN(22,22) << 4);
15128       UInt dM  = INSN(3,0) | (INSN(5,5) << 4);
15129       UInt b16 = (insn28 >> 16) & 1;
15130       UInt b7  = (insn28 >> 7) & 1;
15131       /**/ if (b16 == 0 && b7 == 0) {
15132          // FCPYD
15133          putDReg(dD, getDReg(dM), condT);
15134          DIP("fcpyd%s d%u, d%u\n", nCC(conq), dD, dM);
15135          goto decode_success_vfp;
15136       }
15137       else if (b16 == 0 && b7 == 1) {
15138          // FABSD
15139          putDReg(dD, unop(Iop_AbsF64, getDReg(dM)), condT);
15140          DIP("fabsd%s d%u, d%u\n", nCC(conq), dD, dM);
15141          goto decode_success_vfp;
15142       }
15143       else if (b16 == 1 && b7 == 0) {
15144          // FNEGD
15145          putDReg(dD, unop(Iop_NegF64, getDReg(dM)), condT);
15146          DIP("fnegd%s d%u, d%u\n", nCC(conq), dD, dM);
15147          goto decode_success_vfp;
15148       }
15149       else if (b16 == 1 && b7 == 1) {
15150          // FSQRTD
15151          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
15152          putDReg(dD, binop(Iop_SqrtF64, rm, getDReg(dM)), condT);
15153          DIP("fsqrtd%s d%u, d%u\n", nCC(conq), dD, dM);
15154          goto decode_success_vfp;
15155       }
15156       else
15157          vassert(0);
15158
15159       /* fall through */
15160    }
15161
15162    /* ----------------- I <-> D conversions ----------------- */
15163
15164    // F{S,U}ITOD dD, fM
15165    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15166        && BITS4(1,0,0,0) == (INSN(19,16) & BITS4(1,1,1,1))
15167        && BITS4(1,0,1,1) == INSN(11,8)
15168        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15169       UInt bM    = (insn28 >> 5) & 1;
15170       UInt fM    = (INSN(3,0) << 1) | bM;
15171       UInt dD    = INSN(15,12) | (INSN(22,22) << 4);
15172       UInt syned = (insn28 >> 7) & 1;
15173       if (syned) {
15174          // FSITOD
15175          putDReg(dD, unop(Iop_I32StoF64,
15176                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
15177                  condT);
15178          DIP("fsitod%s d%u, s%u\n", nCC(conq), dD, fM);
15179       } else {
15180          // FUITOD
15181          putDReg(dD, unop(Iop_I32UtoF64,
15182                           unop(Iop_ReinterpF32asI32, getFReg(fM))),
15183                  condT);
15184          DIP("fuitod%s d%u, s%u\n", nCC(conq), dD, fM);
15185       }
15186       goto decode_success_vfp;
15187    }
15188
15189    // FTO{S,U}ID fD, dM
15190    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15191        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15192        && BITS4(1,0,1,1) == INSN(11,8)
15193        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15194       UInt   bD    = (insn28 >> 22) & 1;
15195       UInt   fD    = (INSN(15,12) << 1) | bD;
15196       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
15197       UInt   bZ    = (insn28 >> 7) & 1;
15198       UInt   syned = (insn28 >> 16) & 1;
15199       IRTemp rmode = newTemp(Ity_I32);
15200       assign(rmode, bZ ? mkU32(Irrm_ZERO)
15201                        : mkexpr(mk_get_IR_rounding_mode()));
15202       if (syned) {
15203          // FTOSID
15204          putFReg(fD, unop(Iop_ReinterpI32asF32,
15205                           binop(Iop_F64toI32S, mkexpr(rmode),
15206                                 getDReg(dM))),
15207                  condT);
15208          DIP("ftosi%sd%s s%u, d%u\n", bZ ? "z" : "",
15209              nCC(conq), fD, dM);
15210       } else {
15211          // FTOUID
15212          putFReg(fD, unop(Iop_ReinterpI32asF32,
15213                           binop(Iop_F64toI32U, mkexpr(rmode),
15214                                 getDReg(dM))),
15215                  condT);
15216          DIP("ftoui%sd%s s%u, d%u\n", bZ ? "z" : "",
15217              nCC(conq), fD, dM);
15218       }
15219       goto decode_success_vfp;
15220    }
15221
15222    /* ----------------------------------------------------------- */
15223    /* -- VFP instructions -- single precision                  -- */
15224    /* ----------------------------------------------------------- */
15225
15226    /* --------------------- fldms, fstms --------------------- */
15227    /*
15228                                  31   27   23   19 15 11   7   0
15229                                          P UDWL
15230       C4-98, C5-26   1  FSTMD    cond 1100 1x00 Rn Fd 1010 offset
15231       C4-98, C5-28   2  FSTMDIA  cond 1100 1x10 Rn Fd 1010 offset
15232       C4-98, C5-30   3  FSTMDDB  cond 1101 0x10 Rn Fd 1010 offset
15233
15234       C4-40, C5-26   1  FLDMD    cond 1100 1x01 Rn Fd 1010 offset
15235       C4-40, C5-26   2  FLDMIAD  cond 1100 1x11 Rn Fd 1010 offset
15236       C4-40, C5-26   3  FLDMDBD  cond 1101 0x11 Rn Fd 1010 offset
15237
15238       Regs transferred: F(Fd:D) .. F(Fd:d + offset)
15239       offset must not imply a reg > 15
15240       IA/DB: Rn is changed by (4 x # regs transferred)
15241
15242       case coding:
15243          1  at-Rn   (access at Rn)
15244          2  ia-Rn   (access at Rn, then Rn += 4n)
15245          3  db-Rn   (Rn -= 4n,     then access at Rn)
15246    */
15247    if (BITS8(1,1,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))
15248        && INSN(11,8) == BITS4(1,0,1,0)) {
15249       UInt bP      = (insn28 >> 24) & 1;
15250       UInt bU      = (insn28 >> 23) & 1;
15251       UInt bW      = (insn28 >> 21) & 1;
15252       UInt bL      = (insn28 >> 20) & 1;
15253       UInt bD      = (insn28 >> 22) & 1;
15254       UInt offset  = (insn28 >> 0) & 0xFF;
15255       UInt rN      = INSN(19,16);
15256       UInt fD      = (INSN(15,12) << 1) | bD;
15257       UInt nRegs   = offset;
15258       UInt summary = 0;
15259       Int  i;
15260
15261       /**/ if (bP == 0 && bU == 1 && bW == 0) {
15262          summary = 1;
15263       }
15264       else if (bP == 0 && bU == 1 && bW == 1) {
15265          summary = 2;
15266       }
15267       else if (bP == 1 && bU == 0 && bW == 1) {
15268          summary = 3;
15269       }
15270       else goto after_vfp_fldms_fstms;
15271
15272       /* no writebacks to r15 allowed.  No use of r15 in thumb mode. */
15273       if (rN == 15 && (summary == 2 || summary == 3 || isT))
15274          goto after_vfp_fldms_fstms;
15275
15276       /* offset must specify at least one register */
15277       if (offset < 1)
15278          goto after_vfp_fldms_fstms;
15279
15280       /* can't transfer regs after S31 */
15281       if (fD + nRegs - 1 >= 32)
15282          goto after_vfp_fldms_fstms;
15283
15284       /* Now, we can't do a conditional load or store, since that very
15285          likely will generate an exception.  So we have to take a side
15286          exit at this point if the condition is false. */
15287       if (condT != IRTemp_INVALID) {
15288          if (isT)
15289             mk_skip_over_T32_if_cond_is_false( condT );
15290          else
15291             mk_skip_over_A32_if_cond_is_false( condT );
15292          condT = IRTemp_INVALID;
15293       }
15294       /* Ok, now we're unconditional.  Do the load or store. */
15295
15296       /* get the old Rn value */
15297       IRTemp rnT = newTemp(Ity_I32);
15298       assign(rnT, align4if(isT ? getIRegT(rN) : getIRegA(rN),
15299                            rN == 15));
15300
15301       /* make a new value for Rn, post-insn */
15302       IRTemp rnTnew = IRTemp_INVALID;
15303       if (summary == 2 || summary == 3) {
15304          rnTnew = newTemp(Ity_I32);
15305          assign(rnTnew, binop(summary == 2 ? Iop_Add32 : Iop_Sub32,
15306                               mkexpr(rnT),
15307                               mkU32(4 * nRegs)));
15308       }
15309
15310       /* decide on the base transfer address */
15311       IRTemp taT = newTemp(Ity_I32);
15312       assign(taT, summary == 3 ? mkexpr(rnTnew) : mkexpr(rnT));
15313
15314       /* update Rn if necessary -- in case 3, we're moving it down, so
15315          update before any memory reference, in order to keep Memcheck
15316          and V's stack-extending logic (on linux) happy */
15317       if (summary == 3) {
15318          if (isT)
15319             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
15320          else
15321             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
15322       }
15323
15324       /* generate the transfers */
15325       for (i = 0; i < nRegs; i++) {
15326          IRExpr* addr = binop(Iop_Add32, mkexpr(taT), mkU32(4*i));
15327          if (bL) {
15328             putFReg(fD + i, loadLE(Ity_F32, addr), IRTemp_INVALID);
15329          } else {
15330             storeLE(addr, getFReg(fD + i));
15331          }
15332       }
15333
15334       /* update Rn if necessary -- in case 2, we're moving it up, so
15335          update after any memory reference, in order to keep Memcheck
15336          and V's stack-extending logic (on linux) happy */
15337       if (summary == 2) {
15338          if (isT)
15339             putIRegT(rN, mkexpr(rnTnew), IRTemp_INVALID);
15340          else
15341             putIRegA(rN, mkexpr(rnTnew), IRTemp_INVALID, Ijk_Boring);
15342       }
15343
15344       const HChar* nm = bL==1 ? "ld" : "st";
15345       switch (summary) {
15346          case 1:  DIP("f%sms%s r%u, {s%u-s%u}\n",
15347                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
15348                   break;
15349          case 2:  DIP("f%smias%s r%u!, {s%u-s%u}\n",
15350                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
15351                   break;
15352          case 3:  DIP("f%smdbs%s r%u!, {s%u-s%u}\n",
15353                       nm, nCC(conq), rN, fD, fD + nRegs - 1);
15354                   break;
15355          default: vassert(0);
15356       }
15357
15358       goto decode_success_vfp;
15359       /* FIXME alignment constraints? */
15360    }
15361
15362   after_vfp_fldms_fstms:
15363
15364    /* --------------------- fmsr, fmrs --------------------- */
15365    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
15366        && BITS4(1,0,1,0) == INSN(11,8)
15367        && BITS4(0,0,0,0) == INSN(3,0)
15368        && BITS4(0,0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
15369       UInt rD  = INSN(15,12);
15370       UInt b7  = (insn28 >> 7) & 1;
15371       UInt fN  = (INSN(19,16) << 1) | b7;
15372       UInt b20 = (insn28 >> 20) & 1;
15373       if (rD == 15) {
15374          /* fall through */
15375          /* Let's assume that no sane person would want to do
15376             floating-point transfers to or from the program counter,
15377             and simply decline to decode the instruction.  The ARM ARM
15378             doesn't seem to explicitly disallow this case, though. */
15379       } else {
15380          if (b20) {
15381             IRExpr* res = unop(Iop_ReinterpF32asI32, getFReg(fN));
15382             if (isT)
15383                putIRegT(rD, res, condT);
15384             else
15385                putIRegA(rD, res, condT, Ijk_Boring);
15386             DIP("fmrs%s r%u, s%u\n", nCC(conq), rD, fN);
15387          } else {
15388             putFReg(fN, unop(Iop_ReinterpI32asF32,
15389                              isT ? getIRegT(rD) : getIRegA(rD)),
15390                         condT);
15391             DIP("fmsr%s s%u, r%u\n", nCC(conq), fN, rD);
15392          }
15393          goto decode_success_vfp;
15394       }
15395       /* fall through */
15396    }
15397
15398    /* --------------------- f{ld,st}s --------------------- */
15399    // FLDS, FSTS
15400    if (BITS8(1,1,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,1,0))
15401        && BITS4(1,0,1,0) == INSN(11,8)) {
15402       UInt bD     = (insn28 >> 22) & 1;
15403       UInt fD     = (INSN(15,12) << 1) | bD;
15404       UInt rN     = INSN(19,16);
15405       UInt offset = (insn28 & 0xFF) << 2;
15406       UInt bU     = (insn28 >> 23) & 1; /* 1: +offset  0: -offset */
15407       UInt bL     = (insn28 >> 20) & 1; /* 1: load  0: store */
15408       /* make unconditional */
15409       if (condT != IRTemp_INVALID) {
15410          if (isT)
15411             mk_skip_over_T32_if_cond_is_false( condT );
15412          else
15413             mk_skip_over_A32_if_cond_is_false( condT );
15414          condT = IRTemp_INVALID;
15415       }
15416       IRTemp ea = newTemp(Ity_I32);
15417       assign(ea, binop(bU ? Iop_Add32 : Iop_Sub32,
15418                        align4if(isT ? getIRegT(rN) : getIRegA(rN),
15419                                 rN == 15),
15420                        mkU32(offset)));
15421       if (bL) {
15422          putFReg(fD, loadLE(Ity_F32,mkexpr(ea)), IRTemp_INVALID);
15423       } else {
15424          storeLE(mkexpr(ea), getFReg(fD));
15425       }
15426       DIP("f%ss%s s%u, [r%u, %c#%u]\n",
15427           bL ? "ld" : "st", nCC(conq), fD, rN,
15428           bU ? '+' : '-', offset);
15429       goto decode_success_vfp;
15430    }
15431
15432    /* --------------------- dp insns (F) --------------------- */
15433    if (BITS8(1,1,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))
15434        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
15435        && BITS4(0,0,0,0) == (INSN(7,4) & BITS4(0,0,0,1))) {
15436       UInt    bM  = (insn28 >> 5) & 1;
15437       UInt    bD  = (insn28 >> 22) & 1;
15438       UInt    bN  = (insn28 >> 7) & 1;
15439       UInt    fM  = (INSN(3,0) << 1) | bM;   /* argR */
15440       UInt    fD  = (INSN(15,12) << 1) | bD; /* dst/acc */
15441       UInt    fN  = (INSN(19,16) << 1) | bN; /* argL */
15442       UInt    bP  = (insn28 >> 23) & 1;
15443       UInt    bQ  = (insn28 >> 21) & 1;
15444       UInt    bR  = (insn28 >> 20) & 1;
15445       UInt    bS  = (insn28 >> 6) & 1;
15446       UInt    opc = (bP << 3) | (bQ << 2) | (bR << 1) | bS;
15447       IRExpr* rm  = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
15448       switch (opc) {
15449          case BITS4(0,0,0,0): /* MAC: d + n * m */
15450             putFReg(fD, triop(Iop_AddF32, rm,
15451                               getFReg(fD),
15452                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
15453                         condT);
15454             DIP("fmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15455             goto decode_success_vfp;
15456          case BITS4(0,0,0,1): /* NMAC: d + -(n * m) */
15457             putFReg(fD, triop(Iop_AddF32, rm,
15458                               getFReg(fD),
15459                               unop(Iop_NegF32,
15460                                    triop(Iop_MulF32, rm, getFReg(fN),
15461                                                          getFReg(fM)))),
15462                         condT);
15463             DIP("fnmacs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15464             goto decode_success_vfp;
15465          case BITS4(0,0,1,0): /* MSC: - d + n * m */
15466             putFReg(fD, triop(Iop_AddF32, rm,
15467                               unop(Iop_NegF32, getFReg(fD)),
15468                               triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM))),
15469                         condT);
15470             DIP("fmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15471             goto decode_success_vfp;
15472          case BITS4(0,0,1,1): /* NMSC: - d + -(n * m) */
15473             putFReg(fD, triop(Iop_AddF32, rm,
15474                               unop(Iop_NegF32, getFReg(fD)),
15475                               unop(Iop_NegF32,
15476                                    triop(Iop_MulF32, rm,
15477                                                      getFReg(fN),
15478                                                     getFReg(fM)))),
15479                         condT);
15480             DIP("fnmscs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15481             goto decode_success_vfp;
15482          case BITS4(0,1,0,0): /* MUL: n * m */
15483             putFReg(fD, triop(Iop_MulF32, rm, getFReg(fN), getFReg(fM)),
15484                         condT);
15485             DIP("fmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15486             goto decode_success_vfp;
15487          case BITS4(0,1,0,1): /* NMUL: - n * m */
15488             putFReg(fD, unop(Iop_NegF32,
15489                              triop(Iop_MulF32, rm, getFReg(fN),
15490                                                    getFReg(fM))),
15491                     condT);
15492             DIP("fnmuls%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15493             goto decode_success_vfp;
15494          case BITS4(0,1,1,0): /* ADD: n + m */
15495             putFReg(fD, triop(Iop_AddF32, rm, getFReg(fN), getFReg(fM)),
15496                         condT);
15497             DIP("fadds%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15498             goto decode_success_vfp;
15499          case BITS4(0,1,1,1): /* SUB: n - m */
15500             putFReg(fD, triop(Iop_SubF32, rm, getFReg(fN), getFReg(fM)),
15501                         condT);
15502             DIP("fsubs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15503             goto decode_success_vfp;
15504          case BITS4(1,0,0,0): /* DIV: n / m */
15505             putFReg(fD, triop(Iop_DivF32, rm, getFReg(fN), getFReg(fM)),
15506                         condT);
15507             DIP("fdivs%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15508             goto decode_success_vfp;
15509          case BITS4(1,0,1,0): /* VNFMS: -(d - n * m) (fused) */
15510             /* XXXROUNDINGFIXME look up ARM reference for fused
15511                multiply-add rounding */
15512             putFReg(fD, triop(Iop_AddF32, rm,
15513                               unop(Iop_NegF32, getFReg(fD)),
15514                               triop(Iop_MulF32, rm,
15515                                                 getFReg(fN),
15516                                                 getFReg(fM))),
15517                         condT);
15518             DIP("vfnmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15519             goto decode_success_vfp;
15520          case BITS4(1,0,1,1): /* VNFMA: -(d + n * m) (fused) */
15521             /* XXXROUNDINGFIXME look up ARM reference for fused
15522                multiply-add rounding */
15523             putFReg(fD, triop(Iop_AddF32, rm,
15524                               unop(Iop_NegF32, getFReg(fD)),
15525                               triop(Iop_MulF32, rm,
15526                                                 unop(Iop_NegF32, getFReg(fN)),
15527                                                 getFReg(fM))),
15528                         condT);
15529             DIP("vfnmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15530             goto decode_success_vfp;
15531          case BITS4(1,1,0,0): /* VFMA: d + n * m (fused) */
15532             /* XXXROUNDINGFIXME look up ARM reference for fused
15533                multiply-add rounding */
15534             putFReg(fD, triop(Iop_AddF32, rm,
15535                               getFReg(fD),
15536                               triop(Iop_MulF32, rm, getFReg(fN),
15537                                                     getFReg(fM))),
15538                         condT);
15539             DIP("vfmas%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15540             goto decode_success_vfp;
15541          case BITS4(1,1,0,1): /* VFMS: d + (-n * m) (fused) */
15542             /* XXXROUNDINGFIXME look up ARM reference for fused
15543                multiply-add rounding */
15544             putFReg(fD, triop(Iop_AddF32, rm,
15545                               getFReg(fD),
15546                               triop(Iop_MulF32, rm,
15547                                     unop(Iop_NegF32, getFReg(fN)),
15548                                     getFReg(fM))),
15549                         condT);
15550             DIP("vfmss%s s%u, s%u, s%u\n", nCC(conq), fD, fN, fM);
15551             goto decode_success_vfp;
15552          default:
15553             break;
15554       }
15555    }
15556
15557    /* --------------------- compares (S) --------------------- */
15558    /*          31   27   23   19   15 11   7    3
15559                  28   24   20   16 12    8    4    0
15560       FCMPS    cond 1110 1D11 0100 Fd 1010 01M0 Fm
15561       FCMPES   cond 1110 1D11 0100 Fd 1010 11M0 Fm
15562       FCMPZS   cond 1110 1D11 0101 Fd 1010 0100 0000
15563       FCMPZED  cond 1110 1D11 0101 Fd 1010 1100 0000
15564                                  Z         N
15565
15566       Z=0 Compare Fd:D vs Fm:M     and set FPSCR 31:28 accordingly
15567       Z=1 Compare Fd:D vs zero
15568
15569       N=1 generates Invalid Operation exn if either arg is any kind of NaN
15570       N=0 generates Invalid Operation exn if either arg is a signalling NaN
15571       (Not that we pay any attention to N here)
15572    */
15573    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15574        && BITS4(0,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15575        && BITS4(1,0,1,0) == INSN(11,8)
15576        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15577       UInt bZ = (insn28 >> 16) & 1;
15578       UInt bN = (insn28 >> 7) & 1;
15579       UInt bD = (insn28 >> 22) & 1;
15580       UInt bM = (insn28 >> 5) & 1;
15581       UInt fD = (INSN(15,12) << 1) | bD;
15582       UInt fM = (INSN(3,0) << 1) | bM;
15583       if (bZ && (INSN(3,0) != 0 || (INSN(7,4) & 3) != 0)) {
15584          /* does not decode; fall through */
15585       } else {
15586          IRTemp argL = newTemp(Ity_F64);
15587          IRTemp argR = newTemp(Ity_F64);
15588          IRTemp irRes = newTemp(Ity_I32);
15589
15590          assign(argL, unop(Iop_F32toF64, getFReg(fD)));
15591          assign(argR, bZ ? IRExpr_Const(IRConst_F64i(0))
15592                          : unop(Iop_F32toF64, getFReg(fM)));
15593          assign(irRes, binop(Iop_CmpF64, mkexpr(argL), mkexpr(argR)));
15594
15595          IRTemp nzcv     = IRTemp_INVALID;
15596          IRTemp oldFPSCR = newTemp(Ity_I32);
15597          IRTemp newFPSCR = newTemp(Ity_I32);
15598
15599          /* This is where the fun starts.  We have to convert 'irRes'
15600             from an IR-convention return result (IRCmpF64Result) to an
15601             ARM-encoded (N,Z,C,V) group.  The final result is in the
15602             bottom 4 bits of 'nzcv'. */
15603          /* Map compare result from IR to ARM(nzcv) */
15604          /*
15605             FP cmp result | IR   | ARM(nzcv)
15606             --------------------------------
15607             UN              0x45   0011
15608             LT              0x01   1000
15609             GT              0x00   0010
15610             EQ              0x40   0110
15611          */
15612          nzcv = mk_convert_IRCmpF64Result_to_NZCV(irRes);
15613
15614          /* And update FPSCR accordingly */
15615          assign(oldFPSCR, IRExpr_Get(OFFB_FPSCR, Ity_I32));
15616          assign(newFPSCR,
15617                 binop(Iop_Or32,
15618                       binop(Iop_And32, mkexpr(oldFPSCR), mkU32(0x0FFFFFFF)),
15619                       binop(Iop_Shl32, mkexpr(nzcv), mkU8(28))));
15620
15621          putMiscReg32(OFFB_FPSCR, mkexpr(newFPSCR), condT);
15622
15623          if (bZ) {
15624             DIP("fcmpz%ss%s s%u\n", bN ? "e" : "", nCC(conq), fD);
15625          } else {
15626             DIP("fcmp%ss%s s%u, s%u\n", bN ? "e" : "",
15627                 nCC(conq), fD, fM);
15628          }
15629          goto decode_success_vfp;
15630       }
15631       /* fall through */
15632    }
15633
15634    /* --------------------- unary (S) --------------------- */
15635    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15636        && BITS4(0,0,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15637        && BITS4(1,0,1,0) == INSN(11,8)
15638        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15639       UInt bD = (insn28 >> 22) & 1;
15640       UInt bM = (insn28 >> 5) & 1;
15641       UInt fD  = (INSN(15,12) << 1) | bD;
15642       UInt fM  = (INSN(3,0) << 1) | bM;
15643       UInt b16 = (insn28 >> 16) & 1;
15644       UInt b7  = (insn28 >> 7) & 1;
15645       /**/ if (b16 == 0 && b7 == 0) {
15646          // FCPYS
15647          putFReg(fD, getFReg(fM), condT);
15648          DIP("fcpys%s s%u, s%u\n", nCC(conq), fD, fM);
15649          goto decode_success_vfp;
15650       }
15651       else if (b16 == 0 && b7 == 1) {
15652          // FABSS
15653          putFReg(fD, unop(Iop_AbsF32, getFReg(fM)), condT);
15654          DIP("fabss%s s%u, s%u\n", nCC(conq), fD, fM);
15655          goto decode_success_vfp;
15656       }
15657       else if (b16 == 1 && b7 == 0) {
15658          // FNEGS
15659          putFReg(fD, unop(Iop_NegF32, getFReg(fM)), condT);
15660          DIP("fnegs%s s%u, s%u\n", nCC(conq), fD, fM);
15661          goto decode_success_vfp;
15662       }
15663       else if (b16 == 1 && b7 == 1) {
15664          // FSQRTS
15665          IRExpr* rm = get_FAKE_roundingmode(); /* XXXROUNDINGFIXME */
15666          putFReg(fD, binop(Iop_SqrtF32, rm, getFReg(fM)), condT);
15667          DIP("fsqrts%s s%u, s%u\n", nCC(conq), fD, fM);
15668          goto decode_success_vfp;
15669       }
15670       else
15671          vassert(0);
15672
15673       /* fall through */
15674    }
15675
15676    /* ----------------- I <-> S conversions ----------------- */
15677
15678    // F{S,U}ITOS fD, fM
15679    /* These are more complex than FSITOD/FUITOD.  In the D cases, a 32
15680       bit int will always fit within the 53 bit mantissa, so there's
15681       no possibility of a loss of precision, but that's obviously not
15682       the case here.  Hence this case possibly requires rounding, and
15683       so it drags in the current rounding mode. */
15684    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15685        && BITS4(1,0,0,0) == INSN(19,16)
15686        && BITS4(1,0,1,0) == (INSN(11,8) & BITS4(1,1,1,0))
15687        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15688       UInt bM    = (insn28 >> 5) & 1;
15689       UInt bD    = (insn28 >> 22) & 1;
15690       UInt fM    = (INSN(3,0) << 1) | bM;
15691       UInt fD    = (INSN(15,12) << 1) | bD;
15692       UInt syned = (insn28 >> 7) & 1;
15693       IRTemp rmode = newTemp(Ity_I32);
15694       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
15695       if (syned) {
15696          // FSITOS
15697          putFReg(fD, binop(Iop_F64toF32,
15698                            mkexpr(rmode),
15699                            unop(Iop_I32StoF64,
15700                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
15701                  condT);
15702          DIP("fsitos%s s%u, s%u\n", nCC(conq), fD, fM);
15703       } else {
15704          // FUITOS
15705          putFReg(fD, binop(Iop_F64toF32,
15706                            mkexpr(rmode),
15707                            unop(Iop_I32UtoF64,
15708                                 unop(Iop_ReinterpF32asI32, getFReg(fM)))),
15709                  condT);
15710          DIP("fuitos%s s%u, s%u\n", nCC(conq), fD, fM);
15711       }
15712       goto decode_success_vfp;
15713    }
15714
15715    // FTO{S,U}IS fD, fM
15716    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15717        && BITS4(1,1,0,0) == (INSN(19,16) & BITS4(1,1,1,0))
15718        && BITS4(1,0,1,0) == INSN(11,8)
15719        && BITS4(0,1,0,0) == (INSN(7,4) & BITS4(0,1,0,1))) {
15720       UInt   bM    = (insn28 >> 5) & 1;
15721       UInt   bD    = (insn28 >> 22) & 1;
15722       UInt   fD    = (INSN(15,12) << 1) | bD;
15723       UInt   fM    = (INSN(3,0) << 1) | bM;
15724       UInt   bZ    = (insn28 >> 7) & 1;
15725       UInt   syned = (insn28 >> 16) & 1;
15726       IRTemp rmode = newTemp(Ity_I32);
15727       assign(rmode, bZ ? mkU32(Irrm_ZERO)
15728                        : mkexpr(mk_get_IR_rounding_mode()));
15729       if (syned) {
15730          // FTOSIS
15731          putFReg(fD, unop(Iop_ReinterpI32asF32,
15732                           binop(Iop_F64toI32S, mkexpr(rmode),
15733                                 unop(Iop_F32toF64, getFReg(fM)))),
15734                  condT);
15735          DIP("ftosi%ss%s s%u, d%u\n", bZ ? "z" : "",
15736              nCC(conq), fD, fM);
15737          goto decode_success_vfp;
15738       } else {
15739          // FTOUIS
15740          putFReg(fD, unop(Iop_ReinterpI32asF32,
15741                           binop(Iop_F64toI32U, mkexpr(rmode),
15742                                 unop(Iop_F32toF64, getFReg(fM)))),
15743                  condT);
15744          DIP("ftoui%ss%s s%u, d%u\n", bZ ? "z" : "",
15745              nCC(conq), fD, fM);
15746          goto decode_success_vfp;
15747       }
15748    }
15749
15750    /* ----------------- S <-> D conversions ----------------- */
15751
15752    // FCVTDS
15753    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15754        && BITS4(0,1,1,1) == INSN(19,16)
15755        && BITS4(1,0,1,0) == INSN(11,8)
15756        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
15757       UInt dD = INSN(15,12) | (INSN(22,22) << 4);
15758       UInt bM = (insn28 >> 5) & 1;
15759       UInt fM = (INSN(3,0) << 1) | bM;
15760       putDReg(dD, unop(Iop_F32toF64, getFReg(fM)), condT);
15761       DIP("fcvtds%s d%u, s%u\n", nCC(conq), dD, fM);
15762       goto decode_success_vfp;
15763    }
15764
15765    // FCVTSD
15766    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15767        && BITS4(0,1,1,1) == INSN(19,16)
15768        && BITS4(1,0,1,1) == INSN(11,8)
15769        && BITS4(1,1,0,0) == (INSN(7,4) & BITS4(1,1,0,1))) {
15770       UInt   bD    = (insn28 >> 22) & 1;
15771       UInt   fD    = (INSN(15,12) << 1) | bD;
15772       UInt   dM    = INSN(3,0) | (INSN(5,5) << 4);
15773       IRTemp rmode = newTemp(Ity_I32);
15774       assign(rmode, mkexpr(mk_get_IR_rounding_mode()));
15775       putFReg(fD, binop(Iop_F64toF32, mkexpr(rmode), getDReg(dM)),
15776                   condT);
15777       DIP("fcvtsd%s s%u, d%u\n", nCC(conq), fD, dM);
15778       goto decode_success_vfp;
15779    }
15780
15781    /* --------------- VCVT fixed<->floating, VFP --------------- */
15782    /*          31   27   23   19   15 11   7    3
15783                  28   24   20   16 12    8    4    0
15784
15785                cond 1110 1D11 1p1U Vd 101f x1i0 imm4
15786
15787       VCVT<c>.<Td>.F64 <Dd>, <Dd>, #fbits
15788       VCVT<c>.<Td>.F32 <Dd>, <Dd>, #fbits
15789       VCVT<c>.F64.<Td> <Dd>, <Dd>, #fbits
15790       VCVT<c>.F32.<Td> <Dd>, <Dd>, #fbits
15791       are of this form.  We only handle a subset of the cases though.
15792    */
15793    if (BITS8(1,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
15794        && BITS4(1,0,1,0) == (INSN(19,16) & BITS4(1,0,1,0))
15795        && BITS3(1,0,1) == INSN(11,9)
15796        && BITS3(1,0,0) == (INSN(6,4) & BITS3(1,0,1))) {
15797       UInt bD        = INSN(22,22);
15798       UInt bOP       = INSN(18,18);
15799       UInt bU        = INSN(16,16);
15800       UInt Vd        = INSN(15,12);
15801       UInt bSF       = INSN(8,8);
15802       UInt bSX       = INSN(7,7);
15803       UInt bI        = INSN(5,5);
15804       UInt imm4      = INSN(3,0);
15805       Bool to_fixed  = bOP == 1;
15806       Bool dp_op     = bSF == 1;
15807       Bool unsyned   = bU == 1;
15808       UInt size      = bSX == 0 ? 16 : 32;
15809       Int  frac_bits = size - ((imm4 << 1) | bI);
15810       UInt d         = dp_op  ? ((bD << 4) | Vd)  : ((Vd << 1) | bD);
15811
15812       IRExpr* rm     = mkU32(Irrm_NEAREST);
15813       IRTemp  scale  = newTemp(Ity_F64);
15814       assign(scale, unop(Iop_I32UtoF64, mkU32( ((UInt)1) << (frac_bits-1) )));
15815
15816       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && !dp_op
15817                                             && size == 32) {
15818          /* VCVT.F32.{S,U}32 S[d], S[d], #frac_bits */
15819          /* This generates really horrible code.  We could potentially
15820             do much better. */
15821          IRTemp rmode = newTemp(Ity_I32);
15822          assign(rmode, mkU32(Irrm_NEAREST)); // per the spec
15823          IRTemp src32 = newTemp(Ity_I32);
15824          assign(src32,  unop(Iop_ReinterpF32asI32, getFReg(d)));
15825          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
15826                                 mkexpr(src32 ) );
15827          IRExpr* resF64 = triop(Iop_DivF64,
15828                                 rm, as_F64,
15829                                 triop(Iop_AddF64, rm, mkexpr(scale),
15830                                                       mkexpr(scale)));
15831          IRExpr* resF32 = binop(Iop_F64toF32, mkexpr(rmode), resF64);
15832          putFReg(d, resF32, condT);
15833          DIP("vcvt.f32.%c32, s%u, s%u, #%d\n",
15834              unsyned ? 'u' : 's', d, d, frac_bits);
15835          goto decode_success_vfp;
15836       }
15837       if (frac_bits >= 1 && frac_bits <= 32 && !to_fixed && dp_op
15838                                             && size == 32) {
15839          /* VCVT.F64.{S,U}32 D[d], D[d], #frac_bits */
15840          /* This generates really horrible code.  We could potentially
15841             do much better. */
15842          IRTemp src32 = newTemp(Ity_I32);
15843          assign(src32, unop(Iop_64to32, getDRegI64(d)));
15844          IRExpr* as_F64 = unop( unsyned ? Iop_I32UtoF64 : Iop_I32StoF64,
15845                                 mkexpr(src32 ) );
15846          IRExpr* resF64 = triop(Iop_DivF64,
15847                                 rm, as_F64,
15848                                 triop(Iop_AddF64, rm, mkexpr(scale),
15849                                                       mkexpr(scale)));
15850          putDReg(d, resF64, condT);
15851          DIP("vcvt.f64.%c32, d%u, d%u, #%d\n",
15852              unsyned ? 'u' : 's', d, d, frac_bits);
15853          goto decode_success_vfp;
15854       }
15855       if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && dp_op
15856                                             && size == 32) {
15857          /* VCVT.{S,U}32.F64 D[d], D[d], #frac_bits */
15858          IRTemp srcF64 = newTemp(Ity_F64);
15859          assign(srcF64, getDReg(d));
15860          IRTemp scaledF64 = newTemp(Ity_F64);
15861          assign(scaledF64, triop(Iop_MulF64,
15862                                  rm, mkexpr(srcF64),
15863                                  triop(Iop_AddF64, rm, mkexpr(scale),
15864                                                        mkexpr(scale))));
15865          IRTemp rmode = newTemp(Ity_I32);
15866          assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
15867          IRTemp asI32 = newTemp(Ity_I32);
15868          assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
15869                              mkexpr(rmode), mkexpr(scaledF64)));
15870          putDRegI64(d, unop(unsyned ? Iop_32Uto64 : Iop_32Sto64,
15871                             mkexpr(asI32)), condT);
15872
15873          DIP("vcvt.%c32.f64, d%u, d%u, #%d\n",
15874              unsyned ? 'u' : 's', d, d, frac_bits);
15875          goto decode_success_vfp;
15876       }
15877       if (frac_bits >= 1 && frac_bits <= 32 && to_fixed && !dp_op
15878                                             && size == 32) {
15879          /* VCVT.{S,U}32.F32 S[d], S[d], #frac_bits */
15880          IRTemp srcF32 = newTemp(Ity_F32);
15881          assign(srcF32, getFReg(d));
15882          IRTemp scaledF64 = newTemp(Ity_F64);
15883          assign(scaledF64, triop(Iop_MulF64,
15884                                  rm, unop(Iop_F32toF64, mkexpr(srcF32)),
15885                                  triop(Iop_AddF64, rm, mkexpr(scale),
15886                                                        mkexpr(scale))));
15887          IRTemp rmode = newTemp(Ity_I32);
15888          assign(rmode, mkU32(Irrm_ZERO)); // as per the spec
15889          IRTemp asI32 = newTemp(Ity_I32);
15890          assign(asI32, binop(unsyned ? Iop_F64toI32U : Iop_F64toI32S,
15891                              mkexpr(rmode), mkexpr(scaledF64)));
15892          putFReg(d, unop(Iop_ReinterpI32asF32, mkexpr(asI32)), condT);
15893          DIP("vcvt.%c32.f32, d%u, d%u, #%d\n",
15894              unsyned ? 'u' : 's', d, d, frac_bits);
15895          goto decode_success_vfp;
15896       }
15897       /* fall through */
15898    }
15899
15900    /* FAILURE */
15901    return False;
15902
15903   decode_success_vfp:
15904    /* Check that any accepted insn really is a CP10 or CP11 insn, iow,
15905       assert that we aren't accepting, in this fn, insns that actually
15906       should be handled somewhere else. */
15907    vassert(INSN(11,9) == BITS3(1,0,1)); // 11:8 = 1010 or 1011
15908    return True;
15909
15910 #  undef INSN
15911 }
15912
15913
15914 /*------------------------------------------------------------*/
15915 /*--- Instructions in NV (never) space                     ---*/
15916 /*------------------------------------------------------------*/
15917
15918 /* ARM only */
15919 /* Translate a NV space instruction.  If successful, returns True and
15920    *dres may or may not be updated.  If failure, returns False and
15921    doesn't change *dres nor create any IR.
15922
15923    Note that all NEON instructions (in ARM mode) up to and including
15924    ARMv7, but not later, are handled through here, since they are all
15925    in NV space.
15926 */
15927 static Bool decode_NV_instruction_ARMv7_and_below
15928                                  ( /*MOD*/DisResult* dres,
15929                                     const VexArchInfo* archinfo,
15930                                     UInt insn )
15931 {
15932 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
15933 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
15934
15935    HChar dis_buf[128];
15936
15937    // Should only be called for NV instructions
15938    vassert(BITS4(1,1,1,1) == INSN_COND);
15939
15940    /* ------------------------ pld{w} ------------------------ */
15941    if (BITS8(0,1,0,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
15942        && BITS4(1,1,1,1) == INSN(15,12)) {
15943       UInt rN    = INSN(19,16);
15944       UInt imm12 = INSN(11,0);
15945       UInt bU    = INSN(23,23);
15946       UInt bR    = INSN(22,22);
15947       DIP("pld%c [r%u, #%c%u]\n", bR ? ' ' : 'w', rN, bU ? '+' : '-', imm12);
15948       return True;
15949    }
15950
15951    if (BITS8(0,1,1,1, 0,0, 0,1) == (INSN(27,20) & BITS8(1,1,1,1, 0,0, 1,1))
15952        && BITS4(1,1,1,1) == INSN(15,12)
15953        && 0 == INSN(4,4)) {
15954       UInt rN   = INSN(19,16);
15955       UInt rM   = INSN(3,0);
15956       UInt imm5 = INSN(11,7);
15957       UInt sh2  = INSN(6,5);
15958       UInt bU   = INSN(23,23);
15959       UInt bR   = INSN(22,22);
15960       if (rM != 15 && (rN != 15 || bR)) {
15961          IRExpr* eaE = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
15962                                                        sh2, imm5, dis_buf);
15963          IRTemp eaT = newTemp(Ity_I32);
15964          /* Bind eaE to a temp merely for debugging-vex purposes, so we
15965             can check it's a plausible decoding.  It will get removed
15966             by iropt a little later on. */
15967          vassert(eaE);
15968          assign(eaT, eaE);
15969          DIP("pld%c %s\n", bR ? ' ' : 'w', dis_buf);
15970          return True;
15971       }
15972       /* fall through */
15973    }
15974
15975    /* ------------------------ pli ------------------------ */
15976    if (BITS8(0,1,0,0, 0, 1,0,1) == (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1))
15977        && BITS4(1,1,1,1) == INSN(15,12)) {
15978       UInt rN    = INSN(19,16);
15979       UInt imm12 = INSN(11,0);
15980       UInt bU    = INSN(23,23);
15981       DIP("pli [r%u, #%c%u]\n", rN, bU ? '+' : '-', imm12);
15982       return True;
15983    }
15984
15985    /* --------------------- Interworking branches --------------------- */
15986
15987    // BLX (1), viz, unconditional branch and link to R15+simm24
15988    // and set CPSR.T = 1, that is, switch to Thumb mode
15989    if (INSN(31,25) == BITS7(1,1,1,1,1,0,1)) {
15990       UInt bitH   = INSN(24,24);
15991       UInt uimm24 = INSN(23,0);   uimm24 <<= 8;
15992       Int  simm24 = (Int)uimm24;  simm24 >>= 8;
15993       simm24 = (((UInt)simm24) << 2) + (bitH << 1);
15994       /* Now this is a bit tricky.  Since we're decoding an ARM insn,
15995          it is implies that CPSR.T == 0.  Hence the current insn's
15996          address is guaranteed to be of the form X--(30)--X00.  So, no
15997          need to mask any bits off it.  But need to set the lowest bit
15998          to 1 to denote we're in Thumb mode after this, since
15999          guest_R15T has CPSR.T as the lowest bit.  And we can't chase
16000          into the call, so end the block at this point. */
16001       UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
16002       putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
16003                     IRTemp_INVALID/*because AL*/, Ijk_Boring );
16004       llPutIReg(15, mkU32(dst));
16005       dres->jk_StopHere = Ijk_Call;
16006       dres->whatNext    = Dis_StopHere;
16007       DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
16008       return True;
16009    }
16010
16011    /* ------------------- v7 barrier insns ------------------- */
16012    switch (insn) {
16013       case 0xF57FF06F: /* ISB */
16014          stmt( IRStmt_MBE(Imbe_Fence) );
16015          DIP("ISB\n");
16016          return True;
16017       case 0xF57FF04F: /* DSB sy */
16018       case 0xF57FF04E: /* DSB st */
16019       case 0xF57FF04B: /* DSB ish */
16020       case 0xF57FF04A: /* DSB ishst */
16021       case 0xF57FF047: /* DSB nsh */
16022       case 0xF57FF046: /* DSB nshst */
16023       case 0xF57FF043: /* DSB osh */
16024       case 0xF57FF042: /* DSB oshst */
16025          stmt( IRStmt_MBE(Imbe_Fence) );
16026          DIP("DSB\n");
16027          return True;
16028       case 0xF57FF05F: /* DMB sy */
16029       case 0xF57FF05E: /* DMB st */
16030       case 0xF57FF05B: /* DMB ish */
16031       case 0xF57FF05A: /* DMB ishst */
16032       case 0xF57FF057: /* DMB nsh */
16033       case 0xF57FF056: /* DMB nshst */
16034       case 0xF57FF053: /* DMB osh */
16035       case 0xF57FF052: /* DMB oshst */
16036          stmt( IRStmt_MBE(Imbe_Fence) );
16037          DIP("DMB\n");
16038          return True;
16039       default:
16040          break;
16041    }
16042
16043    /* ------------------- CLREX ------------------ */
16044    if (insn == 0xF57FF01F) {
16045       /* AFAICS, this simply cancels a (all?) reservations made by a
16046          (any?) preceding LDREX(es).  Arrange to hand it through to
16047          the back end. */
16048       stmt( IRStmt_MBE(Imbe_CancelReservation) );
16049       DIP("clrex\n");
16050       return True;
16051    }
16052
16053    /* ------------------- NEON ------------------- */
16054    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
16055       Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
16056                         dres, insn, IRTemp_INVALID/*unconditional*/,
16057                         False/*!isT*/
16058                      );
16059       if (ok_neon)
16060          return True;
16061    }
16062
16063    // unrecognised
16064    return False;
16065
16066 #  undef INSN_COND
16067 #  undef INSN
16068 }
16069
16070
16071 /*------------------------------------------------------------*/
16072 /*--- Disassemble a single ARM instruction                 ---*/
16073 /*------------------------------------------------------------*/
16074
16075 /* Disassemble a single ARM instruction into IR.  The instruction is
16076    located in host memory at guest_instr, and has (decoded) guest IP
16077    of guest_R15_curr_instr_notENC, which will have been set before the
16078    call here. */
16079
16080 static
16081 DisResult disInstr_ARM_WRK (
16082              Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
16083              Bool         resteerCisOk,
16084              void*        callback_opaque,
16085              const UChar* guest_instr,
16086              const VexArchInfo* archinfo,
16087              const VexAbiInfo*  abiinfo,
16088              Bool         sigill_diag
16089           )
16090 {
16091    // A macro to fish bits out of 'insn'.
16092 #  define INSN(_bMax,_bMin)  SLICE_UInt(insn, (_bMax), (_bMin))
16093 #  define INSN_COND          SLICE_UInt(insn, 31, 28)
16094
16095    DisResult dres;
16096    UInt      insn;
16097    IRTemp    condT; /* :: Ity_I32 */
16098    UInt      summary;
16099    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
16100
16101    /* Set result defaults. */
16102    dres.whatNext    = Dis_Continue;
16103    dres.len         = 4;
16104    dres.continueAt  = 0;
16105    dres.jk_StopHere = Ijk_INVALID;
16106    dres.hint        = Dis_HintNone;
16107
16108    /* Set default actions for post-insn handling of writes to r15, if
16109       required. */
16110    r15written = False;
16111    r15guard   = IRTemp_INVALID; /* unconditional */
16112    r15kind    = Ijk_Boring;
16113
16114    /* At least this is simple on ARM: insns are all 4 bytes long, and
16115       4-aligned.  So just fish the whole thing out of memory right now
16116       and have done. */
16117    insn = getUIntLittleEndianly( guest_instr );
16118
16119    if (0) vex_printf("insn: 0x%x\n", insn);
16120
16121    DIP("\t(arm) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
16122
16123    vassert(0 == (guest_R15_curr_instr_notENC & 3));
16124
16125    /* ----------------------------------------------------------- */
16126
16127    /* Spot "Special" instructions (see comment at top of file). */
16128    {
16129       const UChar* code = guest_instr;
16130       /* Spot the 16-byte preamble:
16131
16132          e1a0c1ec  mov r12, r12, ROR #3
16133          e1a0c6ec  mov r12, r12, ROR #13
16134          e1a0ceec  mov r12, r12, ROR #29
16135          e1a0c9ec  mov r12, r12, ROR #19
16136       */
16137       UInt word1 = 0xE1A0C1EC;
16138       UInt word2 = 0xE1A0C6EC;
16139       UInt word3 = 0xE1A0CEEC;
16140       UInt word4 = 0xE1A0C9EC;
16141       if (getUIntLittleEndianly(code+ 0) == word1 &&
16142           getUIntLittleEndianly(code+ 4) == word2 &&
16143           getUIntLittleEndianly(code+ 8) == word3 &&
16144           getUIntLittleEndianly(code+12) == word4) {
16145          /* Got a "Special" instruction preamble.  Which one is it? */
16146          if (getUIntLittleEndianly(code+16) == 0xE18AA00A
16147                                                /* orr r10,r10,r10 */) {
16148             /* R3 = client_request ( R4 ) */
16149             DIP("r3 = client_request ( %%r4 )\n");
16150             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
16151             dres.jk_StopHere = Ijk_ClientReq;
16152             dres.whatNext    = Dis_StopHere;
16153             goto decode_success;
16154          }
16155          else
16156          if (getUIntLittleEndianly(code+16) == 0xE18BB00B
16157                                                /* orr r11,r11,r11 */) {
16158             /* R3 = guest_NRADDR */
16159             DIP("r3 = guest_NRADDR\n");
16160             dres.len = 20;
16161             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
16162             goto decode_success;
16163          }
16164          else
16165          if (getUIntLittleEndianly(code+16) == 0xE18CC00C
16166                                                /* orr r12,r12,r12 */) {
16167             /*  branch-and-link-to-noredir R4 */
16168             DIP("branch-and-link-to-noredir r4\n");
16169             llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
16170             llPutIReg(15, llGetIReg(4));
16171             dres.jk_StopHere = Ijk_NoRedir;
16172             dres.whatNext    = Dis_StopHere;
16173             goto decode_success;
16174          }
16175          else
16176          if (getUIntLittleEndianly(code+16) == 0xE1899009
16177                                                /* orr r9,r9,r9 */) {
16178             /* IR injection */
16179             DIP("IR injection\n");
16180             vex_inject_ir(irsb, Iend_LE);
16181             // Invalidate the current insn. The reason is that the IRop we're
16182             // injecting here can change. In which case the translation has to
16183             // be redone. For ease of handling, we simply invalidate all the
16184             // time.
16185             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
16186             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
16187             llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
16188             dres.whatNext    = Dis_StopHere;
16189             dres.jk_StopHere = Ijk_InvalICache;
16190             goto decode_success;
16191          }
16192          /* We don't know what it is.  Set opc1/opc2 so decode_failure
16193             can print the insn following the Special-insn preamble. */
16194          insn = getUIntLittleEndianly(code+16);
16195          goto decode_failure;
16196          /*NOTREACHED*/
16197       }
16198
16199    }
16200
16201    /* ----------------------------------------------------------- */
16202
16203    /* Main ARM instruction decoder starts here. */
16204
16205    /* Deal with the condition.  Strategy is to merely generate a
16206       condition temporary at this point (or IRTemp_INVALID, meaning
16207       unconditional).  We leave it to lower-level instruction decoders
16208       to decide whether they can generate straight-line code, or
16209       whether they must generate a side exit before the instruction.
16210       condT :: Ity_I32 and is always either zero or one. */
16211    condT = IRTemp_INVALID;
16212    switch ( (ARMCondcode)INSN_COND ) {
16213       case ARMCondNV: {
16214          // Illegal instruction prior to v5 (see ARM ARM A3-5), but
16215          // some cases are acceptable
16216          Bool ok
16217             = decode_NV_instruction_ARMv7_and_below(&dres, archinfo, insn);
16218          if (ok)
16219             goto decode_success;
16220          else
16221             goto after_v7_decoder;
16222       }
16223       case ARMCondAL: // Always executed
16224          break;
16225       case ARMCondEQ: case ARMCondNE: case ARMCondHS: case ARMCondLO:
16226       case ARMCondMI: case ARMCondPL: case ARMCondVS: case ARMCondVC:
16227       case ARMCondHI: case ARMCondLS: case ARMCondGE: case ARMCondLT:
16228       case ARMCondGT: case ARMCondLE:
16229          condT = newTemp(Ity_I32);
16230          assign( condT, mk_armg_calculate_condition( INSN_COND ));
16231          break;
16232    }
16233
16234    /* ----------------------------------------------------------- */
16235    /* -- ARMv5 integer instructions                            -- */
16236    /* ----------------------------------------------------------- */
16237
16238    /* ---------------- Data processing ops ------------------- */
16239
16240    if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0))
16241        && !(INSN(25,25) == 0 && INSN(7,7) == 1 && INSN(4,4) == 1)) {
16242       IRTemp  shop = IRTemp_INVALID; /* shifter operand */
16243       IRTemp  shco = IRTemp_INVALID; /* shifter carry out */
16244       UInt    rD   = (insn >> 12) & 0xF; /* 15:12 */
16245       UInt    rN   = (insn >> 16) & 0xF; /* 19:16 */
16246       UInt    bitS = (insn >> 20) & 1; /* 20:20 */
16247       IRTemp  rNt  = IRTemp_INVALID;
16248       IRTemp  res  = IRTemp_INVALID;
16249       IRTemp  oldV = IRTemp_INVALID;
16250       IRTemp  oldC = IRTemp_INVALID;
16251       const HChar*  name = NULL;
16252       IROp    op   = Iop_INVALID;
16253       Bool    ok;
16254
16255       switch (INSN(24,21)) {
16256
16257          /* --------- ADD, SUB, AND, OR --------- */
16258          case BITS4(0,1,0,0): /* ADD:  Rd = Rn + shifter_operand */
16259             name = "add"; op = Iop_Add32; goto rd_eq_rn_op_SO;
16260          case BITS4(0,0,1,0): /* SUB:  Rd = Rn - shifter_operand */
16261             name = "sub"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
16262          case BITS4(0,0,1,1): /* RSB:  Rd = shifter_operand - Rn */
16263             name = "rsb"; op = Iop_Sub32; goto rd_eq_rn_op_SO;
16264          case BITS4(0,0,0,0): /* AND:  Rd = Rn & shifter_operand */
16265             name = "and"; op = Iop_And32; goto rd_eq_rn_op_SO;
16266          case BITS4(1,1,0,0): /* OR:   Rd = Rn | shifter_operand */
16267             name = "orr"; op = Iop_Or32; goto rd_eq_rn_op_SO;
16268          case BITS4(0,0,0,1): /* EOR:  Rd = Rn ^ shifter_operand */
16269             name = "eor"; op = Iop_Xor32; goto rd_eq_rn_op_SO;
16270          case BITS4(1,1,1,0): /* BIC:  Rd = Rn & ~shifter_operand */
16271             name = "bic"; op = Iop_And32; goto rd_eq_rn_op_SO;
16272          rd_eq_rn_op_SO: {
16273             Bool isRSB = False;
16274             Bool isBIC = False;
16275             switch (INSN(24,21)) {
16276                case BITS4(0,0,1,1):
16277                   vassert(op == Iop_Sub32); isRSB = True; break;
16278                case BITS4(1,1,1,0):
16279                   vassert(op == Iop_And32); isBIC = True; break;
16280                default:
16281                   break;
16282             }
16283             rNt = newTemp(Ity_I32);
16284             assign(rNt, getIRegA(rN));
16285             ok = mk_shifter_operand(
16286                     INSN(25,25), INSN(11,0),
16287                     &shop, bitS ? &shco : NULL, dis_buf
16288                  );
16289             if (!ok)
16290                break;
16291             res = newTemp(Ity_I32);
16292             // compute the main result
16293             if (isRSB) {
16294                // reverse-subtract: shifter_operand - Rn
16295                vassert(op == Iop_Sub32);
16296                assign(res, binop(op, mkexpr(shop), mkexpr(rNt)) );
16297             } else if (isBIC) {
16298                // andn: shifter_operand & ~Rn
16299                vassert(op == Iop_And32);
16300                assign(res, binop(op, mkexpr(rNt),
16301                                      unop(Iop_Not32, mkexpr(shop))) );
16302             } else {
16303                // normal: Rn op shifter_operand
16304                assign(res, binop(op, mkexpr(rNt), mkexpr(shop)) );
16305             }
16306             // but don't commit it until after we've finished
16307             // all necessary reads from the guest state
16308             if (bitS
16309                 && (op == Iop_And32 || op == Iop_Or32 || op == Iop_Xor32)) {
16310                oldV = newTemp(Ity_I32);
16311                assign( oldV, mk_armg_calculate_flag_v() );
16312             }
16313             // can't safely read guest state after here
16314             // now safe to put the main result
16315             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
16316             // XXXX!! not safe to read any guest state after
16317             // this point (I think the code below doesn't do that).
16318             if (!bitS)
16319                vassert(shco == IRTemp_INVALID);
16320             /* Update the flags thunk if necessary */
16321             if (bitS) {
16322                vassert(shco != IRTemp_INVALID);
16323                switch (op) {
16324                   case Iop_Add32:
16325                      setFlags_D1_D2( ARMG_CC_OP_ADD, rNt, shop, condT );
16326                      break;
16327                   case Iop_Sub32:
16328                      if (isRSB) {
16329                         setFlags_D1_D2( ARMG_CC_OP_SUB, shop, rNt, condT );
16330                      } else {
16331                         setFlags_D1_D2( ARMG_CC_OP_SUB, rNt, shop, condT );
16332                      }
16333                      break;
16334                   case Iop_And32: /* BIC and AND set the flags the same */
16335                   case Iop_Or32:
16336                   case Iop_Xor32:
16337                      // oldV has been read just above
16338                      setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
16339                                         res, shco, oldV, condT );
16340                      break;
16341                   default:
16342                      vassert(0);
16343                }
16344             }
16345             DIP("%s%s%s r%u, r%u, %s\n",
16346                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
16347             goto decode_success;
16348          }
16349
16350          /* --------- MOV, MVN --------- */
16351          case BITS4(1,1,0,1):   /* MOV: Rd = shifter_operand */
16352          case BITS4(1,1,1,1): { /* MVN: Rd = not(shifter_operand) */
16353             Bool isMVN = INSN(24,21) == BITS4(1,1,1,1);
16354             IRTemp jk = Ijk_Boring;
16355             if (rN != 0)
16356                break; /* rN must be zero */
16357             ok = mk_shifter_operand(
16358                     INSN(25,25), INSN(11,0),
16359                     &shop, bitS ? &shco : NULL, dis_buf
16360                  );
16361             if (!ok)
16362                break;
16363             res = newTemp(Ity_I32);
16364             assign( res, isMVN ? unop(Iop_Not32, mkexpr(shop))
16365                                : mkexpr(shop) );
16366             if (bitS) {
16367                vassert(shco != IRTemp_INVALID);
16368                oldV = newTemp(Ity_I32);
16369                assign( oldV, mk_armg_calculate_flag_v() );
16370             } else {
16371                vassert(shco == IRTemp_INVALID);
16372             }
16373             /* According to the Cortex A8 TRM Sec. 5.2.1, MOV PC, r14 is a
16374                 return for purposes of branch prediction. */
16375             if (!isMVN && INSN(11,0) == 14) {
16376               jk = Ijk_Ret;
16377             }
16378             // can't safely read guest state after here
16379             putIRegA( rD, mkexpr(res), condT, jk );
16380             /* Update the flags thunk if necessary */
16381             if (bitS) {
16382                setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
16383                                   res, shco, oldV, condT );
16384             }
16385             DIP("%s%s%s r%u, %s\n",
16386                 isMVN ? "mvn" : "mov",
16387                 nCC(INSN_COND), bitS ? "s" : "", rD, dis_buf );
16388             goto decode_success;
16389          }
16390
16391          /* --------- CMP --------- */
16392          case BITS4(1,0,1,0):   /* CMP:  (void) Rn - shifter_operand */
16393          case BITS4(1,0,1,1): { /* CMN:  (void) Rn + shifter_operand */
16394             Bool isCMN = INSN(24,21) == BITS4(1,0,1,1);
16395             if (rD != 0)
16396                break; /* rD must be zero */
16397             if (bitS == 0)
16398                break; /* if S (bit 20) is not set, it's not CMP/CMN */
16399             rNt = newTemp(Ity_I32);
16400             assign(rNt, getIRegA(rN));
16401             ok = mk_shifter_operand(
16402                     INSN(25,25), INSN(11,0),
16403                     &shop, NULL, dis_buf
16404                  );
16405             if (!ok)
16406                break;
16407             // can't safely read guest state after here
16408             /* Update the flags thunk. */
16409             setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
16410                             rNt, shop, condT );
16411             DIP("%s%s r%u, %s\n",
16412                 isCMN ? "cmn" : "cmp",
16413                 nCC(INSN_COND), rN, dis_buf );
16414             goto decode_success;
16415          }
16416
16417          /* --------- TST --------- */
16418          case BITS4(1,0,0,0):   /* TST:  (void) Rn & shifter_operand */
16419          case BITS4(1,0,0,1): { /* TEQ:  (void) Rn ^ shifter_operand */
16420             Bool isTEQ = INSN(24,21) == BITS4(1,0,0,1);
16421             if (rD != 0)
16422                break; /* rD must be zero */
16423             if (bitS == 0)
16424                break; /* if S (bit 20) is not set, it's not TST/TEQ */
16425             rNt = newTemp(Ity_I32);
16426             assign(rNt, getIRegA(rN));
16427             ok = mk_shifter_operand(
16428                     INSN(25,25), INSN(11,0),
16429                     &shop, &shco, dis_buf
16430                  );
16431             if (!ok)
16432                break;
16433             /* Update the flags thunk. */
16434             res = newTemp(Ity_I32);
16435             assign( res, binop(isTEQ ? Iop_Xor32 : Iop_And32,
16436                                mkexpr(rNt), mkexpr(shop)) );
16437             oldV = newTemp(Ity_I32);
16438             assign( oldV, mk_armg_calculate_flag_v() );
16439             // can't safely read guest state after here
16440             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC,
16441                                res, shco, oldV, condT );
16442             DIP("%s%s r%u, %s\n",
16443                 isTEQ ? "teq" : "tst",
16444                 nCC(INSN_COND), rN, dis_buf );
16445             goto decode_success;
16446          }
16447
16448          /* --------- ADC, SBC, RSC --------- */
16449          case BITS4(0,1,0,1): /* ADC:  Rd = Rn + shifter_operand + oldC */
16450             name = "adc"; goto rd_eq_rn_op_SO_op_oldC;
16451          case BITS4(0,1,1,0): /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
16452             name = "sbc"; goto rd_eq_rn_op_SO_op_oldC;
16453          case BITS4(0,1,1,1): /* RSC:  Rd = shifter_operand - Rn - (oldC ^ 1) */
16454             name = "rsc"; goto rd_eq_rn_op_SO_op_oldC;
16455          rd_eq_rn_op_SO_op_oldC: {
16456             // FIXME: shco isn't used for anything.  Get rid of it.
16457             rNt = newTemp(Ity_I32);
16458             assign(rNt, getIRegA(rN));
16459             ok = mk_shifter_operand(
16460                     INSN(25,25), INSN(11,0),
16461                     &shop, bitS ? &shco : NULL, dis_buf
16462                  );
16463             if (!ok)
16464                break;
16465             oldC = newTemp(Ity_I32);
16466             assign( oldC, mk_armg_calculate_flag_c() );
16467             res = newTemp(Ity_I32);
16468             // compute the main result
16469             switch (INSN(24,21)) {
16470                case BITS4(0,1,0,1): /* ADC */
16471                   assign(res,
16472                          binop(Iop_Add32,
16473                                binop(Iop_Add32, mkexpr(rNt), mkexpr(shop)),
16474                                mkexpr(oldC) ));
16475                   break;
16476                case BITS4(0,1,1,0): /* SBC */
16477                   assign(res,
16478                          binop(Iop_Sub32,
16479                                binop(Iop_Sub32, mkexpr(rNt), mkexpr(shop)),
16480                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
16481                   break;
16482                case BITS4(0,1,1,1): /* RSC */
16483                   assign(res,
16484                          binop(Iop_Sub32,
16485                                binop(Iop_Sub32, mkexpr(shop), mkexpr(rNt)),
16486                                binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
16487                   break;
16488                default:
16489                   vassert(0);
16490             }
16491             // but don't commit it until after we've finished
16492             // all necessary reads from the guest state
16493             // now safe to put the main result
16494             putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
16495             // XXXX!! not safe to read any guest state after
16496             // this point (I think the code below doesn't do that).
16497             if (!bitS)
16498                vassert(shco == IRTemp_INVALID);
16499             /* Update the flags thunk if necessary */
16500             if (bitS) {
16501                vassert(shco != IRTemp_INVALID);
16502                switch (INSN(24,21)) {
16503                   case BITS4(0,1,0,1): /* ADC */
16504                      setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
16505                                         rNt, shop, oldC, condT );
16506                      break;
16507                   case BITS4(0,1,1,0): /* SBC */
16508                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
16509                                         rNt, shop, oldC, condT );
16510                      break;
16511                   case BITS4(0,1,1,1): /* RSC */
16512                      setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
16513                                         shop, rNt, oldC, condT );
16514                      break;
16515                   default:
16516                      vassert(0);
16517                }
16518             }
16519             DIP("%s%s%s r%u, r%u, %s\n",
16520                 name, nCC(INSN_COND), bitS ? "s" : "", rD, rN, dis_buf );
16521             goto decode_success;
16522          }
16523
16524          default:
16525             vassert(0);
16526       }
16527    } /* if (0 == (INSN(27,20) & BITS8(1,1,0,0,0,0,0,0)) */
16528
16529    /* --------------------- Load/store (ubyte & word) -------- */
16530    // LDR STR LDRB STRB
16531    /*                 31   27   23   19 15 11    6   4 3  # highest bit
16532                         28   24   20 16 12
16533       A5-20   1 | 16  cond 0101 UB0L Rn Rd imm12
16534       A5-22   1 | 32  cond 0111 UBOL Rn Rd imm5  sh2 0 Rm
16535       A5-24   2 | 16  cond 0101 UB1L Rn Rd imm12
16536       A5-26   2 | 32  cond 0111 UB1L Rn Rd imm5  sh2 0 Rm
16537       A5-28   3 | 16  cond 0100 UB0L Rn Rd imm12
16538       A5-32   3 | 32  cond 0110 UB0L Rn Rd imm5  sh2 0 Rm
16539    */
16540    /* case coding:
16541              1   at-ea               (access at ea)
16542              2   at-ea-then-upd      (access at ea, then Rn = ea)
16543              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
16544       ea coding
16545              16  Rn +/- imm12
16546              32  Rn +/- Rm sh2 imm5
16547    */
16548    /* Quickly skip over all of this for hopefully most instructions */
16549    if ((INSN(27,24) & BITS4(1,1,0,0)) != BITS4(0,1,0,0))
16550       goto after_load_store_ubyte_or_word;
16551
16552    summary = 0;
16553
16554    /**/ if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 0) {
16555       summary = 1 | 16;
16556    }
16557    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 0
16558                                           && INSN(4,4) == 0) {
16559       summary = 1 | 32;
16560    }
16561    else if (INSN(27,24) == BITS4(0,1,0,1) && INSN(21,21) == 1) {
16562       summary = 2 | 16;
16563    }
16564    else if (INSN(27,24) == BITS4(0,1,1,1) && INSN(21,21) == 1
16565                                           && INSN(4,4) == 0) {
16566       summary = 2 | 32;
16567    }
16568    else if (INSN(27,24) == BITS4(0,1,0,0) && INSN(21,21) == 0) {
16569       summary = 3 | 16;
16570    }
16571    else if (INSN(27,24) == BITS4(0,1,1,0) && INSN(21,21) == 0
16572                                           && INSN(4,4) == 0) {
16573       summary = 3 | 32;
16574    }
16575    else goto after_load_store_ubyte_or_word;
16576
16577    { UInt rN = (insn >> 16) & 0xF; /* 19:16 */
16578      UInt rD = (insn >> 12) & 0xF; /* 15:12 */
16579      UInt rM = (insn >> 0)  & 0xF; /*  3:0  */
16580      UInt bU = (insn >> 23) & 1;      /* 23 */
16581      UInt bB = (insn >> 22) & 1;      /* 22 */
16582      UInt bL = (insn >> 20) & 1;      /* 20 */
16583      UInt imm12 = (insn >> 0) & 0xFFF; /* 11:0 */
16584      UInt imm5  = (insn >> 7) & 0x1F;  /* 11:7 */
16585      UInt sh2   = (insn >> 5) & 3;     /* 6:5 */
16586
16587      /* Skip some invalid cases, which would lead to two competing
16588         updates to the same register, or which are otherwise
16589         disallowed by the spec. */
16590      switch (summary) {
16591         case 1 | 16:
16592            break;
16593         case 1 | 32:
16594            if (rM == 15) goto after_load_store_ubyte_or_word;
16595            break;
16596         case 2 | 16: case 3 | 16:
16597            if (rN == 15) goto after_load_store_ubyte_or_word;
16598            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
16599            break;
16600         case 2 | 32: case 3 | 32:
16601            if (rM == 15) goto after_load_store_ubyte_or_word;
16602            if (rN == 15) goto after_load_store_ubyte_or_word;
16603            if (rN == rM) goto after_load_store_ubyte_or_word;
16604            if (bL == 1 && rN == rD) goto after_load_store_ubyte_or_word;
16605            break;
16606         default:
16607            vassert(0);
16608      }
16609
16610      /* compute the effective address.  Bind it to a tmp since we
16611         may need to use it twice. */
16612      IRExpr* eaE = NULL;
16613      switch (summary & 0xF0) {
16614         case 16:
16615            eaE = mk_EA_reg_plusminus_imm12( rN, bU, imm12, dis_buf );
16616            break;
16617         case 32:
16618            eaE = mk_EA_reg_plusminus_shifted_reg( rN, bU, rM, sh2, imm5,
16619                                                   dis_buf );
16620            break;
16621      }
16622      vassert(eaE);
16623      IRTemp eaT = newTemp(Ity_I32);
16624      assign(eaT, eaE);
16625
16626      /* get the old Rn value */
16627      IRTemp rnT = newTemp(Ity_I32);
16628      assign(rnT, getIRegA(rN));
16629
16630      /* decide on the transfer address */
16631      IRTemp taT = IRTemp_INVALID;
16632      switch (summary & 0x0F) {
16633         case 1: case 2: taT = eaT; break;
16634         case 3:         taT = rnT; break;
16635      }
16636      vassert(taT != IRTemp_INVALID);
16637
16638      if (bL == 0) {
16639        /* Store.  If necessary, update the base register before the
16640           store itself, so that the common idiom of "str rX, [sp,
16641           #-4]!" (store rX at sp-4, then do new sp = sp-4, a.k.a "push
16642           rX") doesn't cause Memcheck to complain that the access is
16643           below the stack pointer.  Also, not updating sp before the
16644           store confuses Valgrind's dynamic stack-extending logic.  So
16645           do it before the store.  Hence we need to snarf the store
16646           data before doing the basereg update. */
16647
16648         /* get hold of the data to be stored */
16649         IRTemp rDt = newTemp(Ity_I32);
16650         assign(rDt, getIRegA(rD));
16651
16652         /* Update Rn if necessary. */
16653         switch (summary & 0x0F) {
16654            case 2: case 3:
16655               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16656               break;
16657         }
16658
16659         /* generate the transfer */
16660         if (bB == 0) { // word store
16661            storeGuardedLE( mkexpr(taT), mkexpr(rDt), condT );
16662         } else { // byte store
16663            vassert(bB == 1);
16664            storeGuardedLE( mkexpr(taT), unop(Iop_32to8, mkexpr(rDt)), condT );
16665         }
16666
16667      } else {
16668         /* Load */
16669         vassert(bL == 1);
16670
16671         /* generate the transfer */
16672         if (bB == 0) { // word load
16673            IRTemp jk = Ijk_Boring;
16674            /* According to the Cortex A8 TRM Sec. 5.2.1, LDR(1) with r13 as the
16675                base register and PC as the destination register is a return for
16676                purposes of branch prediction.
16677               The ARM ARM Sec. C9.10.1 further specifies that it must use a
16678                post-increment by immediate addressing mode to be counted in
16679                event 0x0E (Procedure return).*/
16680            if (rN == 13 && summary == (3 | 16) && bB == 0) {
16681               jk = Ijk_Ret;
16682            }
16683            IRTemp tD = newTemp(Ity_I32);
16684            loadGuardedLE( tD, ILGop_Ident32,
16685                           mkexpr(taT), llGetIReg(rD), condT );
16686            /* "rD == 15 ? condT : IRTemp_INVALID": simply
16687               IRTemp_INVALID would be correct in all cases here, and
16688               for the non-r15 case it generates better code, by
16689               avoiding two tests of the cond (since it is already
16690               tested by loadGuardedLE).  However, the logic at the end
16691               of this function, that deals with writes to r15, has an
16692               optimisation which depends on seeing whether or not the
16693               write is conditional.  Hence in this particular case we
16694               let it "see" the guard condition. */
16695            putIRegA( rD, mkexpr(tD),
16696                      rD == 15 ? condT : IRTemp_INVALID, jk );
16697         } else { // byte load
16698            vassert(bB == 1);
16699            IRTemp tD = newTemp(Ity_I32);
16700            loadGuardedLE( tD, ILGop_8Uto32, mkexpr(taT), llGetIReg(rD), condT );
16701            /* No point in similar 3rd arg complexity here, since we
16702               can't sanely write anything to r15 like this. */
16703            putIRegA( rD, mkexpr(tD), IRTemp_INVALID, Ijk_Boring );
16704         }
16705
16706         /* Update Rn if necessary. */
16707         switch (summary & 0x0F) {
16708            case 2: case 3:
16709               // should be assured by logic above:
16710               if (bL == 1)
16711                  vassert(rD != rN); /* since we just wrote rD */
16712               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16713               break;
16714         }
16715      }
16716
16717      switch (summary & 0x0F) {
16718         case 1:  DIP("%sr%s%s r%u, %s\n",
16719                      bL == 0 ? "st" : "ld",
16720                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
16721                  break;
16722         case 2:  DIP("%sr%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
16723                      bL == 0 ? "st" : "ld",
16724                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
16725                  break;
16726         case 3:  DIP("%sr%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
16727                      bL == 0 ? "st" : "ld",
16728                      bB == 0 ? "" : "b", nCC(INSN_COND), rD, dis_buf);
16729                  break;
16730         default: vassert(0);
16731      }
16732
16733      /* XXX deal with alignment constraints */
16734
16735      goto decode_success;
16736
16737      /* Complications:
16738
16739         For all loads: if the Amode specifies base register
16740         writeback, and the same register is specified for Rd and Rn,
16741         the results are UNPREDICTABLE.
16742
16743         For all loads and stores: if R15 is written, branch to
16744         that address afterwards.
16745
16746         STRB: straightforward
16747         LDRB: loaded data is zero extended
16748         STR:  lowest 2 bits of address are ignored
16749         LDR:  if the lowest 2 bits of the address are nonzero
16750               then the loaded value is rotated right by 8 * the lowest 2 bits
16751      */
16752    }
16753
16754   after_load_store_ubyte_or_word:
16755
16756    /* --------------------- Load/store (sbyte & hword) -------- */
16757    // LDRH LDRSH STRH LDRSB
16758    /*                 31   27   23   19 15 11   7    3     # highest bit
16759                         28   24   20 16 12    8    4    0
16760       A5-36   1 | 16  cond 0001 U10L Rn Rd im4h 1SH1 im4l
16761       A5-38   1 | 32  cond 0001 U00L Rn Rd 0000 1SH1 Rm
16762       A5-40   2 | 16  cond 0001 U11L Rn Rd im4h 1SH1 im4l
16763       A5-42   2 | 32  cond 0001 U01L Rn Rd 0000 1SH1 Rm
16764       A5-44   3 | 16  cond 0000 U10L Rn Rd im4h 1SH1 im4l
16765       A5-46   3 | 32  cond 0000 U00L Rn Rd 0000 1SH1 Rm
16766    */
16767    /* case coding:
16768              1   at-ea               (access at ea)
16769              2   at-ea-then-upd      (access at ea, then Rn = ea)
16770              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
16771       ea coding
16772              16  Rn +/- imm8
16773              32  Rn +/- Rm
16774    */
16775    /* Quickly skip over all of this for hopefully most instructions */
16776    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
16777       goto after_load_store_sbyte_or_hword;
16778
16779    /* Check the "1SH1" thing. */
16780    if ((INSN(7,4) & BITS4(1,0,0,1)) != BITS4(1,0,0,1))
16781       goto after_load_store_sbyte_or_hword;
16782
16783    summary = 0;
16784
16785    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,0)) {
16786       summary = 1 | 16;
16787    }
16788    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,0)) {
16789       summary = 1 | 32;
16790    }
16791    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(1,1)) {
16792       summary = 2 | 16;
16793    }
16794    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,21) == BITS2(0,1)) {
16795       summary = 2 | 32;
16796    }
16797    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(1,0)) {
16798       summary = 3 | 16;
16799    }
16800    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,21) == BITS2(0,0)) {
16801       summary = 3 | 32;
16802    }
16803    else goto after_load_store_sbyte_or_hword;
16804
16805    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
16806      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
16807      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
16808      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
16809      UInt bL   = (insn >> 20) & 1;   /* 20 L=1 load, L=0 store */
16810      UInt bH   = (insn >> 5) & 1;    /* H=1 halfword, H=0 byte */
16811      UInt bS   = (insn >> 6) & 1;    /* S=1 signed, S=0 unsigned */
16812      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
16813
16814      /* Skip combinations that are either meaningless or already
16815         handled by main word-or-unsigned-byte load-store
16816         instructions. */
16817      if (bS == 0 && bH == 0) /* "unsigned byte" */
16818         goto after_load_store_sbyte_or_hword;
16819      if (bS == 1 && bL == 0) /* "signed store" */
16820         goto after_load_store_sbyte_or_hword;
16821
16822      /* Require 11:8 == 0 for Rn +/- Rm cases */
16823      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
16824         goto after_load_store_sbyte_or_hword;
16825
16826      /* Skip some invalid cases, which would lead to two competing
16827         updates to the same register, or which are otherwise
16828         disallowed by the spec. */
16829      switch (summary) {
16830         case 1 | 16:
16831            break;
16832         case 1 | 32:
16833            if (rM == 15) goto after_load_store_sbyte_or_hword;
16834            break;
16835         case 2 | 16: case 3 | 16:
16836            if (rN == 15) goto after_load_store_sbyte_or_hword;
16837            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
16838            break;
16839         case 2 | 32: case 3 | 32:
16840            if (rM == 15) goto after_load_store_sbyte_or_hword;
16841            if (rN == 15) goto after_load_store_sbyte_or_hword;
16842            if (rN == rM) goto after_load_store_sbyte_or_hword;
16843            if (bL == 1 && rN == rD) goto after_load_store_sbyte_or_hword;
16844            break;
16845         default:
16846            vassert(0);
16847      }
16848
16849      /* If this is a branch, make it unconditional at this point.
16850         Doing conditional branches in-line is too complex (for now).
16851         Note that you'd have to be insane to use any of these loads to
16852         do a branch, since they only load 16 bits at most, but we
16853         handle it just in case. */
16854      if (bL == 1 && rD == 15 && condT != IRTemp_INVALID) {
16855         // go uncond
16856         mk_skip_over_A32_if_cond_is_false( condT );
16857         condT = IRTemp_INVALID;
16858         // now uncond
16859      }
16860
16861      /* compute the effective address.  Bind it to a tmp since we
16862         may need to use it twice. */
16863      IRExpr* eaE = NULL;
16864      switch (summary & 0xF0) {
16865         case 16:
16866            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
16867            break;
16868         case 32:
16869            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
16870            break;
16871      }
16872      vassert(eaE);
16873      IRTemp eaT = newTemp(Ity_I32);
16874      assign(eaT, eaE);
16875
16876      /* get the old Rn value */
16877      IRTemp rnT = newTemp(Ity_I32);
16878      assign(rnT, getIRegA(rN));
16879
16880      /* decide on the transfer address */
16881      IRTemp taT = IRTemp_INVALID;
16882      switch (summary & 0x0F) {
16883         case 1: case 2: taT = eaT; break;
16884         case 3:         taT = rnT; break;
16885      }
16886      vassert(taT != IRTemp_INVALID);
16887
16888      /* ll previous value of rD, for dealing with conditional loads */
16889      IRTemp llOldRd = newTemp(Ity_I32);
16890      assign(llOldRd, llGetIReg(rD));
16891
16892      /* halfword store  H 1  L 0  S 0
16893         uhalf load      H 1  L 1  S 0
16894         shalf load      H 1  L 1  S 1
16895         sbyte load      H 0  L 1  S 1
16896      */
16897      const HChar* name = NULL;
16898      /* generate the transfer */
16899      /**/ if (bH == 1 && bL == 0 && bS == 0) { // halfword store
16900         storeGuardedLE( mkexpr(taT),
16901                         unop(Iop_32to16, getIRegA(rD)), condT );
16902         name = "strh";
16903      }
16904      else if (bH == 1 && bL == 1 && bS == 0) { // uhalf load
16905         IRTemp newRd = newTemp(Ity_I32);
16906         loadGuardedLE( newRd, ILGop_16Uto32,
16907                        mkexpr(taT), mkexpr(llOldRd), condT );
16908         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
16909         name = "ldrh";
16910      }
16911      else if (bH == 1 && bL == 1 && bS == 1) { // shalf load
16912         IRTemp newRd = newTemp(Ity_I32);
16913         loadGuardedLE( newRd, ILGop_16Sto32,
16914                        mkexpr(taT), mkexpr(llOldRd), condT );
16915         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
16916         name = "ldrsh";
16917      }
16918      else if (bH == 0 && bL == 1 && bS == 1) { // sbyte load
16919         IRTemp newRd = newTemp(Ity_I32);
16920         loadGuardedLE( newRd, ILGop_8Sto32,
16921                        mkexpr(taT), mkexpr(llOldRd), condT );
16922         putIRegA( rD, mkexpr(newRd), IRTemp_INVALID, Ijk_Boring );
16923         name = "ldrsb";
16924      }
16925      else
16926         vassert(0); // should be assured by logic above
16927
16928      /* Update Rn if necessary. */
16929      switch (summary & 0x0F) {
16930         case 2: case 3:
16931            // should be assured by logic above:
16932            if (bL == 1)
16933               vassert(rD != rN); /* since we just wrote rD */
16934            putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
16935            break;
16936      }
16937
16938      switch (summary & 0x0F) {
16939         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
16940                  break;
16941         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
16942                      name, nCC(INSN_COND), rD, dis_buf);
16943                  break;
16944         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
16945                      name, nCC(INSN_COND), rD, dis_buf);
16946                  break;
16947         default: vassert(0);
16948      }
16949
16950      /* XXX deal with alignment constraints */
16951
16952      goto decode_success;
16953
16954      /* Complications:
16955
16956         For all loads: if the Amode specifies base register
16957         writeback, and the same register is specified for Rd and Rn,
16958         the results are UNPREDICTABLE.
16959
16960         For all loads and stores: if R15 is written, branch to
16961         that address afterwards.
16962
16963         Misaligned halfword stores => Unpredictable
16964         Misaligned halfword loads  => Unpredictable
16965      */
16966    }
16967
16968   after_load_store_sbyte_or_hword:
16969
16970    /* --------------------- Load/store multiple -------------- */
16971    // LD/STMIA LD/STMIB LD/STMDA LD/STMDB
16972    // Remarkably complex and difficult to get right
16973    // match 27:20 as 100XX0WL
16974    if (BITS8(1,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,1,0,0))) {
16975       // A5-50 LD/STMIA  cond 1000 10WL Rn RegList
16976       // A5-51 LD/STMIB  cond 1001 10WL Rn RegList
16977       // A5-53 LD/STMDA  cond 1000 00WL Rn RegList
16978       // A5-53 LD/STMDB  cond 1001 00WL Rn RegList
16979       //                   28   24   20 16       0
16980
16981       UInt bINC    = (insn >> 23) & 1;
16982       UInt bBEFORE = (insn >> 24) & 1;
16983
16984       UInt bL      = (insn >> 20) & 1;  /* load=1, store=0 */
16985       UInt bW      = (insn >> 21) & 1;  /* Rn wback=1, no wback=0 */
16986       UInt rN      = (insn >> 16) & 0xF;
16987       UInt regList = insn & 0xFFFF;
16988       /* Skip some invalid cases, which would lead to two competing
16989          updates to the same register, or which are otherwise
16990          disallowed by the spec.  Note the test above has required
16991          that S == 0, since that looks like a kernel-mode only thing.
16992          Done by forcing the real pattern, viz 100XXSWL to actually be
16993          100XX0WL. */
16994       if (rN == 15) goto after_load_store_multiple;
16995       // reglist can't be empty
16996       if (regList == 0) goto after_load_store_multiple;
16997       // if requested to writeback Rn, and this is a load instruction,
16998       // then Rn can't appear in RegList, since we'd have two competing
16999       // new values for Rn.  We do however accept this case for store
17000       // instructions.
17001       if (bW == 1 && bL == 1 && ((1 << rN) & regList) > 0)
17002          goto after_load_store_multiple;
17003
17004       /* Now, we can't do a conditional load or store, since that very
17005          likely will generate an exception.  So we have to take a side
17006          exit at this point if the condition is false. */
17007       if (condT != IRTemp_INVALID) {
17008          mk_skip_over_A32_if_cond_is_false( condT );
17009          condT = IRTemp_INVALID;
17010       }
17011
17012       /* Ok, now we're unconditional.  Generate the IR. */
17013       mk_ldm_stm( True/*arm*/, rN, bINC, bBEFORE, bW, bL, regList );
17014
17015       DIP("%sm%c%c%s r%u%s, {0x%04x}\n",
17016           bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
17017           nCC(INSN_COND),
17018           rN, bW ? "!" : "", regList);
17019
17020       goto decode_success;
17021    }
17022
17023   after_load_store_multiple:
17024
17025    /* --------------------- Control flow --------------------- */
17026    // B, BL (Branch, or Branch-and-Link, to immediate offset)
17027    //
17028    if (BITS8(1,0,1,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,0,0,0,0,0))) {
17029       UInt link   = (insn >> 24) & 1;
17030       UInt uimm24 = insn & ((1<<24)-1);  uimm24 <<= 8;
17031       Int  simm24 = (Int)uimm24;         simm24 >>= 8;
17032       UInt dst    = guest_R15_curr_instr_notENC + 8 + (((UInt)simm24) << 2);
17033       IRJumpKind jk = link ? Ijk_Call : Ijk_Boring;
17034       if (link) {
17035          putIRegA(14, mkU32(guest_R15_curr_instr_notENC + 4),
17036                       condT, Ijk_Boring);
17037       }
17038       if (condT == IRTemp_INVALID) {
17039          /* unconditional transfer to 'dst'.  See if we can simply
17040             continue tracing at the destination. */
17041          if (resteerOkFn( callback_opaque, dst )) {
17042             /* yes */
17043             dres.whatNext   = Dis_ResteerU;
17044             dres.continueAt = dst;
17045          } else {
17046             /* no; terminate the SB at this point. */
17047             llPutIReg(15, mkU32(dst));
17048             dres.jk_StopHere = jk;
17049             dres.whatNext    = Dis_StopHere;
17050          }
17051          DIP("b%s 0x%x\n", link ? "l" : "", dst);
17052       } else {
17053          /* conditional transfer to 'dst' */
17054          const HChar* comment = "";
17055
17056          /* First see if we can do some speculative chasing into one
17057             arm or the other.  Be conservative and only chase if
17058             !link, that is, this is a normal conditional branch to a
17059             known destination. */
17060          if (!link
17061              && resteerCisOk
17062              && vex_control.guest_chase_cond
17063              && dst < guest_R15_curr_instr_notENC
17064              && resteerOkFn( callback_opaque, dst) ) {
17065             /* Speculation: assume this backward branch is taken.  So
17066                we need to emit a side-exit to the insn following this
17067                one, on the negation of the condition, and continue at
17068                the branch target address (dst). */
17069             stmt( IRStmt_Exit( unop(Iop_Not1,
17070                                     unop(Iop_32to1, mkexpr(condT))),
17071                                Ijk_Boring,
17072                                IRConst_U32(guest_R15_curr_instr_notENC+4),
17073                                OFFB_R15T ));
17074             dres.whatNext   = Dis_ResteerC;
17075             dres.continueAt = (Addr32)dst;
17076             comment = "(assumed taken)";
17077          }
17078          else
17079          if (!link
17080              && resteerCisOk
17081              && vex_control.guest_chase_cond
17082              && dst >= guest_R15_curr_instr_notENC
17083              && resteerOkFn( callback_opaque,
17084                              guest_R15_curr_instr_notENC+4) ) {
17085             /* Speculation: assume this forward branch is not taken.
17086                So we need to emit a side-exit to dst (the dest) and
17087                continue disassembling at the insn immediately
17088                following this one. */
17089             stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
17090                                Ijk_Boring,
17091                                IRConst_U32(dst),
17092                                OFFB_R15T ));
17093             dres.whatNext   = Dis_ResteerC;
17094             dres.continueAt = guest_R15_curr_instr_notENC+4;
17095             comment = "(assumed not taken)";
17096          }
17097          else {
17098             /* Conservative default translation - end the block at
17099                this point. */
17100             stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
17101                                jk, IRConst_U32(dst), OFFB_R15T ));
17102             llPutIReg(15, mkU32(guest_R15_curr_instr_notENC + 4));
17103             dres.jk_StopHere = Ijk_Boring;
17104             dres.whatNext    = Dis_StopHere;
17105          }
17106          DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND),
17107              dst, comment);
17108       }
17109       goto decode_success;
17110    }
17111
17112    // B, BL (Branch, or Branch-and-Link, to a register)
17113    // NB: interworking branch
17114    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
17115        && INSN(19,12) == BITS8(1,1,1,1,1,1,1,1)
17116        && (INSN(11,4) == BITS8(1,1,1,1,0,0,1,1)
17117            || INSN(11,4) == BITS8(1,1,1,1,0,0,0,1))) {
17118       IRTemp  dst = newTemp(Ity_I32);
17119       UInt    link = (INSN(11,4) >> 1) & 1;
17120       UInt    rM   = INSN(3,0);
17121       // we don't decode the case (link && rM == 15), as that's
17122       // Unpredictable.
17123       if (!(link && rM == 15)) {
17124          if (condT != IRTemp_INVALID) {
17125             mk_skip_over_A32_if_cond_is_false( condT );
17126          }
17127          // rM contains an interworking address exactly as we require
17128          // (with continuation CPSR.T in bit 0), so we can use it
17129          // as-is, with no masking.
17130          assign( dst, getIRegA(rM) );
17131          if (link) {
17132             putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
17133                       IRTemp_INVALID/*because AL*/, Ijk_Boring );
17134          }
17135          llPutIReg(15, mkexpr(dst));
17136          dres.jk_StopHere = link ? Ijk_Call
17137                                  : (rM == 14 ? Ijk_Ret : Ijk_Boring);
17138          dres.whatNext    = Dis_StopHere;
17139          if (condT == IRTemp_INVALID) {
17140             DIP("b%sx r%u\n", link ? "l" : "", rM);
17141          } else {
17142             DIP("b%sx%s r%u\n", link ? "l" : "", nCC(INSN_COND), rM);
17143          }
17144          goto decode_success;
17145       }
17146       /* else: (link && rM == 15): just fall through */
17147    }
17148
17149    /* --- NB: ARM interworking branches are in NV space, hence
17150       are handled elsewhere by decode_NV_instruction_ARMv7_and_below.
17151       ---
17152    */
17153
17154    /* --------------------- Clz --------------------- */
17155    // CLZ
17156    if (INSN(27,20) == BITS8(0,0,0,1,0,1,1,0)
17157        && INSN(19,16) == BITS4(1,1,1,1)
17158        && INSN(11,4) == BITS8(1,1,1,1,0,0,0,1)) {
17159       UInt rD = INSN(15,12);
17160       UInt rM = INSN(3,0);
17161       IRTemp arg = newTemp(Ity_I32);
17162       IRTemp res = newTemp(Ity_I32);
17163       assign(arg, getIRegA(rM));
17164       assign(res, IRExpr_ITE(
17165                      binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
17166                      mkU32(32),
17167                      unop(Iop_Clz32, mkexpr(arg))
17168             ));
17169       putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17170       DIP("clz%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
17171       goto decode_success;
17172    }
17173
17174    /* --------------------- Mul etc --------------------- */
17175    // MUL
17176    if (BITS8(0,0,0,0,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
17177        && INSN(15,12) == BITS4(0,0,0,0)
17178        && INSN(7,4) == BITS4(1,0,0,1)) {
17179       UInt bitS = (insn >> 20) & 1; /* 20:20 */
17180       UInt rD = INSN(19,16);
17181       UInt rS = INSN(11,8);
17182       UInt rM = INSN(3,0);
17183       if (rD == 15 || rM == 15 || rS == 15) {
17184          /* Unpredictable; don't decode; fall through */
17185       } else {
17186          IRTemp argL = newTemp(Ity_I32);
17187          IRTemp argR = newTemp(Ity_I32);
17188          IRTemp res  = newTemp(Ity_I32);
17189          IRTemp oldC = IRTemp_INVALID;
17190          IRTemp oldV = IRTemp_INVALID;
17191          assign( argL, getIRegA(rM));
17192          assign( argR, getIRegA(rS));
17193          assign( res, binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) );
17194          if (bitS) {
17195             oldC = newTemp(Ity_I32);
17196             assign(oldC, mk_armg_calculate_flag_c());
17197             oldV = newTemp(Ity_I32);
17198             assign(oldV, mk_armg_calculate_flag_v());
17199          }
17200          // now update guest state
17201          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
17202          if (bitS) {
17203             IRTemp pair = newTemp(Ity_I32);
17204             assign( pair, binop(Iop_Or32,
17205                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17206                                 mkexpr(oldV)) );
17207             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
17208          }
17209          DIP("mul%c%s r%u, r%u, r%u\n",
17210              bitS ? 's' : ' ', nCC(INSN_COND), rD, rM, rS);
17211          goto decode_success;
17212       }
17213       /* fall through */
17214    }
17215
17216    /* --------------------- Integer Divides --------------------- */
17217    // SDIV
17218    if (BITS8(0,1,1,1,0,0,0,1) == INSN(27,20)
17219        && INSN(15,12) == BITS4(1,1,1,1)
17220        && INSN(7,4) == BITS4(0,0,0,1)) {
17221       UInt rD = INSN(19,16);
17222       UInt rM = INSN(11,8);
17223       UInt rN = INSN(3,0);
17224       if (rD == 15 || rM == 15 || rN == 15) {
17225          /* Unpredictable; don't decode; fall through */
17226       } else {
17227          IRTemp res  = newTemp(Ity_I32);
17228          IRTemp argL = newTemp(Ity_I32);
17229          IRTemp argR = newTemp(Ity_I32);
17230          assign(argL, getIRegA(rN));
17231          assign(argR, getIRegA(rM));
17232          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
17233          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17234          DIP("sdiv r%u, r%u, r%u\n", rD, rN, rM);
17235          goto decode_success;
17236       }
17237     }
17238
17239    // UDIV
17240    if (BITS8(0,1,1,1,0,0,1,1) == INSN(27,20)
17241        && INSN(15,12) == BITS4(1,1,1,1)
17242        && INSN(7,4) == BITS4(0,0,0,1)) {
17243       UInt rD = INSN(19,16);
17244       UInt rM = INSN(11,8);
17245       UInt rN = INSN(3,0);
17246       if (rD == 15 || rM == 15 || rN == 15) {
17247          /* Unpredictable; don't decode; fall through */
17248       } else {
17249          IRTemp res  = newTemp(Ity_I32);
17250          IRTemp argL = newTemp(Ity_I32);
17251          IRTemp argR = newTemp(Ity_I32);
17252          assign(argL, getIRegA(rN));
17253          assign(argR, getIRegA(rM));
17254          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
17255          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17256          DIP("udiv r%u, r%u, r%u\n", rD, rN, rM);
17257          goto decode_success;
17258       }
17259    }
17260
17261    // MLA, MLS
17262    if (BITS8(0,0,0,0,0,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17263        && INSN(7,4) == BITS4(1,0,0,1)) {
17264       UInt bitS  = (insn >> 20) & 1; /* 20:20 */
17265       UInt isMLS = (insn >> 22) & 1; /* 22:22 */
17266       UInt rD = INSN(19,16);
17267       UInt rN = INSN(15,12);
17268       UInt rS = INSN(11,8);
17269       UInt rM = INSN(3,0);
17270       if (bitS == 1 && isMLS == 1) {
17271          /* This isn't allowed (MLS that sets flags).  don't decode;
17272             fall through */
17273       }
17274       else
17275       if (rD == 15 || rM == 15 || rS == 15 || rN == 15) {
17276          /* Unpredictable; don't decode; fall through */
17277       } else {
17278          IRTemp argL = newTemp(Ity_I32);
17279          IRTemp argR = newTemp(Ity_I32);
17280          IRTemp argP = newTemp(Ity_I32);
17281          IRTemp res  = newTemp(Ity_I32);
17282          IRTemp oldC = IRTemp_INVALID;
17283          IRTemp oldV = IRTemp_INVALID;
17284          assign( argL, getIRegA(rM));
17285          assign( argR, getIRegA(rS));
17286          assign( argP, getIRegA(rN));
17287          assign( res, binop(isMLS ? Iop_Sub32 : Iop_Add32,
17288                             mkexpr(argP),
17289                             binop(Iop_Mul32, mkexpr(argL), mkexpr(argR)) ));
17290          if (bitS) {
17291             vassert(!isMLS); // guaranteed above
17292             oldC = newTemp(Ity_I32);
17293             assign(oldC, mk_armg_calculate_flag_c());
17294             oldV = newTemp(Ity_I32);
17295             assign(oldV, mk_armg_calculate_flag_v());
17296          }
17297          // now update guest state
17298          putIRegA( rD, mkexpr(res), condT, Ijk_Boring );
17299          if (bitS) {
17300             IRTemp pair = newTemp(Ity_I32);
17301             assign( pair, binop(Iop_Or32,
17302                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17303                                 mkexpr(oldV)) );
17304             setFlags_D1_ND( ARMG_CC_OP_MUL, res, pair, condT );
17305          }
17306          DIP("ml%c%c%s r%u, r%u, r%u, r%u\n",
17307              isMLS ? 's' : 'a', bitS ? 's' : ' ',
17308              nCC(INSN_COND), rD, rM, rS, rN);
17309          goto decode_success;
17310       }
17311       /* fall through */
17312    }
17313
17314    // SMULL, UMULL
17315    if (BITS8(0,0,0,0,1,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17316        && INSN(7,4) == BITS4(1,0,0,1)) {
17317       UInt bitS = (insn >> 20) & 1; /* 20:20 */
17318       UInt rDhi = INSN(19,16);
17319       UInt rDlo = INSN(15,12);
17320       UInt rS   = INSN(11,8);
17321       UInt rM   = INSN(3,0);
17322       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
17323       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
17324          /* Unpredictable; don't decode; fall through */
17325       } else {
17326          IRTemp argL  = newTemp(Ity_I32);
17327          IRTemp argR  = newTemp(Ity_I32);
17328          IRTemp res   = newTemp(Ity_I64);
17329          IRTemp resHi = newTemp(Ity_I32);
17330          IRTemp resLo = newTemp(Ity_I32);
17331          IRTemp oldC  = IRTemp_INVALID;
17332          IRTemp oldV  = IRTemp_INVALID;
17333          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
17334          assign( argL, getIRegA(rM));
17335          assign( argR, getIRegA(rS));
17336          assign( res, binop(mulOp, mkexpr(argL), mkexpr(argR)) );
17337          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17338          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17339          if (bitS) {
17340             oldC = newTemp(Ity_I32);
17341             assign(oldC, mk_armg_calculate_flag_c());
17342             oldV = newTemp(Ity_I32);
17343             assign(oldV, mk_armg_calculate_flag_v());
17344          }
17345          // now update guest state
17346          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
17347          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
17348          if (bitS) {
17349             IRTemp pair = newTemp(Ity_I32);
17350             assign( pair, binop(Iop_Or32,
17351                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17352                                 mkexpr(oldV)) );
17353             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
17354          }
17355          DIP("%cmull%c%s r%u, r%u, r%u, r%u\n",
17356              isS ? 's' : 'u', bitS ? 's' : ' ',
17357              nCC(INSN_COND), rDlo, rDhi, rM, rS);
17358          goto decode_success;
17359       }
17360       /* fall through */
17361    }
17362
17363    // SMLAL, UMLAL
17364    if (BITS8(0,0,0,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17365        && INSN(7,4) == BITS4(1,0,0,1)) {
17366       UInt bitS = (insn >> 20) & 1; /* 20:20 */
17367       UInt rDhi = INSN(19,16);
17368       UInt rDlo = INSN(15,12);
17369       UInt rS   = INSN(11,8);
17370       UInt rM   = INSN(3,0);
17371       UInt isS  = (INSN(27,20) >> 2) & 1; /* 22:22 */
17372       if (rDhi == 15 || rDlo == 15 || rM == 15 || rS == 15 || rDhi == rDlo)  {
17373          /* Unpredictable; don't decode; fall through */
17374       } else {
17375          IRTemp argL  = newTemp(Ity_I32);
17376          IRTemp argR  = newTemp(Ity_I32);
17377          IRTemp old   = newTemp(Ity_I64);
17378          IRTemp res   = newTemp(Ity_I64);
17379          IRTemp resHi = newTemp(Ity_I32);
17380          IRTemp resLo = newTemp(Ity_I32);
17381          IRTemp oldC  = IRTemp_INVALID;
17382          IRTemp oldV  = IRTemp_INVALID;
17383          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
17384          assign( argL, getIRegA(rM));
17385          assign( argR, getIRegA(rS));
17386          assign( old, binop(Iop_32HLto64, getIRegA(rDhi), getIRegA(rDlo)) );
17387          assign( res, binop(Iop_Add64,
17388                             mkexpr(old),
17389                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
17390          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17391          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17392          if (bitS) {
17393             oldC = newTemp(Ity_I32);
17394             assign(oldC, mk_armg_calculate_flag_c());
17395             oldV = newTemp(Ity_I32);
17396             assign(oldV, mk_armg_calculate_flag_v());
17397          }
17398          // now update guest state
17399          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
17400          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
17401          if (bitS) {
17402             IRTemp pair = newTemp(Ity_I32);
17403             assign( pair, binop(Iop_Or32,
17404                                 binop(Iop_Shl32, mkexpr(oldC), mkU8(1)),
17405                                 mkexpr(oldV)) );
17406             setFlags_D1_D2_ND( ARMG_CC_OP_MULL, resLo, resHi, pair, condT );
17407          }
17408          DIP("%cmlal%c%s r%u, r%u, r%u, r%u\n",
17409              isS ? 's' : 'u', bitS ? 's' : ' ', nCC(INSN_COND),
17410              rDlo, rDhi, rM, rS);
17411          goto decode_success;
17412       }
17413       /* fall through */
17414    }
17415
17416    // UMAAL
17417    if (BITS8(0,0,0,0,0,1,0,0) == INSN(27,20) && INSN(7,4) == BITS4(1,0,0,1)) {
17418       UInt rDhi = INSN(19,16);
17419       UInt rDlo = INSN(15,12);
17420       UInt rM   = INSN(11,8);
17421       UInt rN   = INSN(3,0);
17422       if (rDlo == 15 || rDhi == 15 || rN == 15 || rM == 15 || rDhi == rDlo)  {
17423          /* Unpredictable; don't decode; fall through */
17424       } else {
17425          IRTemp argN   = newTemp(Ity_I32);
17426          IRTemp argM   = newTemp(Ity_I32);
17427          IRTemp argDhi = newTemp(Ity_I32);
17428          IRTemp argDlo = newTemp(Ity_I32);
17429          IRTemp res    = newTemp(Ity_I64);
17430          IRTemp resHi  = newTemp(Ity_I32);
17431          IRTemp resLo  = newTemp(Ity_I32);
17432          assign( argN,   getIRegA(rN) );
17433          assign( argM,   getIRegA(rM) );
17434          assign( argDhi, getIRegA(rDhi) );
17435          assign( argDlo, getIRegA(rDlo) );
17436          assign( res,
17437                  binop(Iop_Add64,
17438                        binop(Iop_Add64,
17439                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
17440                              unop(Iop_32Uto64, mkexpr(argDhi))),
17441                        unop(Iop_32Uto64, mkexpr(argDlo))) );
17442          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
17443          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
17444          // now update guest state
17445          putIRegA( rDhi, mkexpr(resHi), condT, Ijk_Boring );
17446          putIRegA( rDlo, mkexpr(resLo), condT, Ijk_Boring );
17447          DIP("umaal %s r%u, r%u, r%u, r%u\n",
17448              nCC(INSN_COND), rDlo, rDhi, rN, rM);
17449          goto decode_success;
17450       }
17451       /* fall through */
17452    }
17453
17454    /* --------------------- Msr etc --------------------- */
17455
17456    // MSR apsr, #imm
17457    if (INSN(27,20) == BITS8(0,0,1,1,0,0,1,0)
17458        && INSN(17,12) == BITS6(0,0,1,1,1,1)) {
17459       UInt write_ge    = INSN(18,18);
17460       UInt write_nzcvq = INSN(19,19);
17461       if (write_nzcvq || write_ge) {
17462          UInt   imm = (INSN(11,0) >> 0) & 0xFF;
17463          UInt   rot = 2 * ((INSN(11,0) >> 8) & 0xF);
17464          IRTemp immT = newTemp(Ity_I32);
17465          vassert(rot <= 30);
17466          imm = ROR32(imm, rot);
17467          assign(immT, mkU32(imm));
17468          desynthesise_APSR( write_nzcvq, write_ge, immT, condT );
17469          DIP("msr%s cpsr%s%sf, #0x%08x\n", nCC(INSN_COND),
17470              write_nzcvq ? "f" : "", write_ge ? "g" : "", imm);
17471          goto decode_success;
17472       }
17473       /* fall through */
17474    }
17475
17476    // MSR apsr, reg
17477    if (INSN(27,20) == BITS8(0,0,0,1,0,0,1,0)
17478        && INSN(17,12) == BITS6(0,0,1,1,1,1)
17479        && INSN(11,4) == BITS8(0,0,0,0,0,0,0,0)) {
17480       UInt rN          = INSN(3,0);
17481       UInt write_ge    = INSN(18,18);
17482       UInt write_nzcvq = INSN(19,19);
17483       if (rN != 15 && (write_nzcvq || write_ge)) {
17484          IRTemp rNt = newTemp(Ity_I32);
17485          assign(rNt, getIRegA(rN));
17486          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
17487          DIP("msr%s cpsr_%s%s, r%u\n", nCC(INSN_COND),
17488              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
17489          goto decode_success;
17490       }
17491       /* fall through */
17492    }
17493
17494    // MRS rD, cpsr
17495    if ((insn & 0x0FFF0FFF) == 0x010F0000) {
17496       UInt rD   = INSN(15,12);
17497       if (rD != 15) {
17498          IRTemp apsr = synthesise_APSR();
17499          putIRegA( rD, mkexpr(apsr), condT, Ijk_Boring );
17500          DIP("mrs%s r%u, cpsr\n", nCC(INSN_COND), rD);
17501          goto decode_success;
17502       }
17503       /* fall through */
17504    }
17505
17506    /* --------------------- Svc --------------------- */
17507    if (BITS8(1,1,1,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,0,0,0,0))) {
17508       UInt imm24 = (insn >> 0) & 0xFFFFFF;
17509       if (imm24 == 0) {
17510          /* A syscall.  We can't do this conditionally, hence: */
17511          if (condT != IRTemp_INVALID) {
17512             mk_skip_over_A32_if_cond_is_false( condT );
17513          }
17514          // AL after here
17515          llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 4 ));
17516          dres.jk_StopHere = Ijk_Sys_syscall;
17517          dres.whatNext    = Dis_StopHere;
17518          DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
17519          goto decode_success;
17520       }
17521       /* fall through */
17522    }
17523
17524    /* ------------------------ swp ------------------------ */
17525
17526    // SWP, SWPB
17527    if (BITS8(0,0,0,1,0,0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
17528        && BITS4(0,0,0,0) == INSN(11,8)
17529        && BITS4(1,0,0,1) == INSN(7,4)) {
17530       UInt   rN   = INSN(19,16);
17531       UInt   rD   = INSN(15,12);
17532       UInt   rM   = INSN(3,0);
17533       IRTemp tRn  = newTemp(Ity_I32);
17534       IRTemp tNew = newTemp(Ity_I32);
17535       IRTemp tOld = IRTemp_INVALID;
17536       IRTemp tSC1 = newTemp(Ity_I1);
17537       UInt   isB  = (insn >> 22) & 1;
17538
17539       if (rD == 15 || rN == 15 || rM == 15 || rN == rM || rN == rD) {
17540          /* undecodable; fall through */
17541       } else {
17542          /* make unconditional */
17543          if (condT != IRTemp_INVALID) {
17544             mk_skip_over_A32_if_cond_is_false( condT );
17545             condT = IRTemp_INVALID;
17546          }
17547          /* Ok, now we're unconditional.  Generate a LL-SC loop. */
17548          assign(tRn, getIRegA(rN));
17549          assign(tNew, getIRegA(rM));
17550          if (isB) {
17551             /* swpb */
17552             tOld = newTemp(Ity_I8);
17553             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
17554                               NULL/*=>isLL*/) );
17555             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
17556                               unop(Iop_32to8, mkexpr(tNew))) );
17557          } else {
17558             /* swp */
17559             tOld = newTemp(Ity_I32);
17560             stmt( IRStmt_LLSC(Iend_LE, tOld, mkexpr(tRn),
17561                               NULL/*=>isLL*/) );
17562             stmt( IRStmt_LLSC(Iend_LE, tSC1, mkexpr(tRn),
17563                               mkexpr(tNew)) );
17564          }
17565          stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
17566                            /*Ijk_NoRedir*/Ijk_Boring,
17567                            IRConst_U32(guest_R15_curr_instr_notENC),
17568                            OFFB_R15T ));
17569          putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
17570                       IRTemp_INVALID, Ijk_Boring);
17571          DIP("swp%s%s r%u, r%u, [r%u]\n",
17572              isB ? "b" : "", nCC(INSN_COND), rD, rM, rN);
17573          goto decode_success;
17574       }
17575       /* fall through */
17576    }
17577
17578    /* ----------------------------------------------------------- */
17579    /* -- ARMv6 instructions                                    -- */
17580    /* ----------------------------------------------------------- */
17581
17582    /* ------------------- {ldr,str}ex{,b,h,d} ------------------- */
17583
17584    // LDREXD, LDREX, LDREXH, LDREXB
17585    if (0x01900F9F == (insn & 0x0F900FFF)) {
17586       UInt   rT    = INSN(15,12);
17587       UInt   rN    = INSN(19,16);
17588       IRType ty    = Ity_INVALID;
17589       IROp   widen = Iop_INVALID;
17590       const HChar* nm = NULL;
17591       Bool   valid = True;
17592       switch (INSN(22,21)) {
17593          case 0: nm = "";  ty = Ity_I32; break;
17594          case 1: nm = "d"; ty = Ity_I64; break;
17595          case 2: nm = "b"; ty = Ity_I8;  widen = Iop_8Uto32; break;
17596          case 3: nm = "h"; ty = Ity_I16; widen = Iop_16Uto32; break;
17597          default: vassert(0);
17598       }
17599       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
17600          if (rT == 15 || rN == 15)
17601             valid = False;
17602       } else {
17603          vassert(ty == Ity_I64);
17604          if ((rT & 1) == 1 || rT == 14 || rN == 15)
17605             valid = False;
17606       }
17607       if (valid) {
17608          IRTemp res;
17609          /* make unconditional */
17610          if (condT != IRTemp_INVALID) {
17611            mk_skip_over_A32_if_cond_is_false( condT );
17612            condT = IRTemp_INVALID;
17613          }
17614          /* Ok, now we're unconditional.  Do the load. */
17615          res = newTemp(ty);
17616          // FIXME: assumes little-endian guest
17617          stmt( IRStmt_LLSC(Iend_LE, res, getIRegA(rN),
17618                            NULL/*this is a load*/) );
17619          if (ty == Ity_I64) {
17620             // FIXME: assumes little-endian guest
17621             putIRegA(rT+0, unop(Iop_64to32, mkexpr(res)),
17622                            IRTemp_INVALID, Ijk_Boring);
17623             putIRegA(rT+1, unop(Iop_64HIto32, mkexpr(res)),
17624                            IRTemp_INVALID, Ijk_Boring);
17625             DIP("ldrex%s%s r%u, r%u, [r%u]\n",
17626                 nm, nCC(INSN_COND), rT+0, rT+1, rN);
17627          } else {
17628             putIRegA(rT, widen == Iop_INVALID
17629                             ? mkexpr(res) : unop(widen, mkexpr(res)),
17630                      IRTemp_INVALID, Ijk_Boring);
17631             DIP("ldrex%s%s r%u, [r%u]\n", nm, nCC(INSN_COND), rT, rN);
17632          }
17633          goto decode_success;
17634       }
17635       /* undecodable; fall through */
17636    }
17637
17638    // STREXD, STREX, STREXH, STREXB
17639    if (0x01800F90 == (insn & 0x0F900FF0)) {
17640       UInt   rT     = INSN(3,0);
17641       UInt   rN     = INSN(19,16);
17642       UInt   rD     = INSN(15,12);
17643       IRType ty     = Ity_INVALID;
17644       IROp   narrow = Iop_INVALID;
17645       const HChar* nm = NULL;
17646       Bool   valid  = True;
17647       switch (INSN(22,21)) {
17648          case 0: nm = "";  ty = Ity_I32; break;
17649          case 1: nm = "d"; ty = Ity_I64; break;
17650          case 2: nm = "b"; ty = Ity_I8;  narrow = Iop_32to8; break;
17651          case 3: nm = "h"; ty = Ity_I16; narrow = Iop_32to16; break;
17652          default: vassert(0);
17653       }
17654       if (ty == Ity_I32 || ty == Ity_I16 || ty == Ity_I8) {
17655          if (rD == 15 || rN == 15 || rT == 15
17656              || rD == rN || rD == rT)
17657             valid = False;
17658       } else {
17659          vassert(ty == Ity_I64);
17660          if (rD == 15 || (rT & 1) == 1 || rT == 14 || rN == 15
17661              || rD == rN || rD == rT || rD == rT+1)
17662             valid = False;
17663       }
17664       if (valid) {
17665          IRTemp resSC1, resSC32, data;
17666          /* make unconditional */
17667          if (condT != IRTemp_INVALID) {
17668             mk_skip_over_A32_if_cond_is_false( condT );
17669             condT = IRTemp_INVALID;
17670          }
17671          /* Ok, now we're unconditional.  Do the store. */
17672          data = newTemp(ty);
17673          assign(data,
17674                 ty == Ity_I64
17675                    // FIXME: assumes little-endian guest
17676                    ? binop(Iop_32HLto64, getIRegA(rT+1), getIRegA(rT+0))
17677                    : narrow == Iop_INVALID
17678                       ? getIRegA(rT)
17679                       : unop(narrow, getIRegA(rT)));
17680          resSC1 = newTemp(Ity_I1);
17681          // FIXME: assumes little-endian guest
17682          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegA(rN), mkexpr(data)) );
17683
17684          /* Set rD to 1 on failure, 0 on success.  Currently we have
17685             resSC1 == 0 on failure, 1 on success. */
17686          resSC32 = newTemp(Ity_I32);
17687          assign(resSC32,
17688                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
17689
17690          putIRegA(rD, mkexpr(resSC32),
17691                       IRTemp_INVALID, Ijk_Boring);
17692          if (ty == Ity_I64) {
17693             DIP("strex%s%s r%u, r%u, r%u, [r%u]\n",
17694                 nm, nCC(INSN_COND), rD, rT, rT+1, rN);
17695          } else {
17696             DIP("strex%s%s r%u, r%u, [r%u]\n",
17697                 nm, nCC(INSN_COND), rD, rT, rN);
17698          }
17699          goto decode_success;
17700       }
17701       /* fall through */
17702    }
17703
17704    /* --------------------- movw, movt --------------------- */
17705    if (0x03000000 == (insn & 0x0FF00000)
17706        || 0x03400000 == (insn & 0x0FF00000)) /* pray for CSE */ {
17707       UInt rD    = INSN(15,12);
17708       UInt imm16 = (insn & 0xFFF) | ((insn >> 4) & 0x0000F000);
17709       UInt isT   = (insn >> 22) & 1;
17710       if (rD == 15) {
17711          /* forget it */
17712       } else {
17713          if (isT) {
17714             putIRegA(rD,
17715                      binop(Iop_Or32,
17716                            binop(Iop_And32, getIRegA(rD), mkU32(0xFFFF)),
17717                            mkU32(imm16 << 16)),
17718                      condT, Ijk_Boring);
17719             DIP("movt%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
17720             goto decode_success;
17721          } else {
17722             putIRegA(rD, mkU32(imm16), condT, Ijk_Boring);
17723             DIP("movw%s r%u, #0x%04x\n", nCC(INSN_COND), rD, imm16);
17724             goto decode_success;
17725          }
17726       }
17727       /* fall through */
17728    }
17729
17730    /* ----------- uxtb, sxtb, uxth, sxth, uxtb16, sxtb16 ----------- */
17731    /* FIXME: this is an exact duplicate of the Thumb version.  They
17732       should be commoned up. */
17733    if (BITS8(0,1,1,0,1, 0,0,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,0,0))
17734        && BITS4(1,1,1,1) == INSN(19,16)
17735        && BITS4(0,1,1,1) == INSN(7,4)
17736        && BITS4(0,0, 0,0) == (INSN(11,8) & BITS4(0,0,1,1))) {
17737       UInt subopc = INSN(27,20) & BITS8(0,0,0,0,0, 1,1,1);
17738       if (subopc != BITS4(0,0,0,1) && subopc != BITS4(0,1,0,1)) {
17739          Int    rot  = (INSN(11,8) >> 2) & 3;
17740          UInt   rM   = INSN(3,0);
17741          UInt   rD   = INSN(15,12);
17742          IRTemp srcT = newTemp(Ity_I32);
17743          IRTemp rotT = newTemp(Ity_I32);
17744          IRTemp dstT = newTemp(Ity_I32);
17745          const HChar* nm = "???";
17746          assign(srcT, getIRegA(rM));
17747          assign(rotT, genROR32(srcT, 8 * rot)); /* 0, 8, 16 or 24 only */
17748          switch (subopc) {
17749             case BITS4(0,1,1,0): // UXTB
17750                assign(dstT, unop(Iop_8Uto32, unop(Iop_32to8, mkexpr(rotT))));
17751                nm = "uxtb";
17752                break;
17753             case BITS4(0,0,1,0): // SXTB
17754                assign(dstT, unop(Iop_8Sto32, unop(Iop_32to8, mkexpr(rotT))));
17755                nm = "sxtb";
17756                break;
17757             case BITS4(0,1,1,1): // UXTH
17758                assign(dstT, unop(Iop_16Uto32, unop(Iop_32to16, mkexpr(rotT))));
17759                nm = "uxth";
17760                break;
17761             case BITS4(0,0,1,1): // SXTH
17762                assign(dstT, unop(Iop_16Sto32, unop(Iop_32to16, mkexpr(rotT))));
17763                nm = "sxth";
17764                break;
17765             case BITS4(0,1,0,0): // UXTB16
17766                assign(dstT, binop(Iop_And32, mkexpr(rotT), mkU32(0x00FF00FF)));
17767                nm = "uxtb16";
17768                break;
17769             case BITS4(0,0,0,0): { // SXTB16
17770                IRTemp lo32 = newTemp(Ity_I32);
17771                IRTemp hi32 = newTemp(Ity_I32);
17772                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
17773                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
17774                assign(
17775                   dstT,
17776                   binop(Iop_Or32,
17777                         binop(Iop_And32,
17778                               unop(Iop_8Sto32,
17779                                    unop(Iop_32to8, mkexpr(lo32))),
17780                               mkU32(0xFFFF)),
17781                         binop(Iop_Shl32,
17782                               unop(Iop_8Sto32,
17783                                    unop(Iop_32to8, mkexpr(hi32))),
17784                               mkU8(16))
17785                ));
17786                nm = "sxtb16";
17787                break;
17788             }
17789             default:
17790                vassert(0); // guarded by "if" above
17791          }
17792          putIRegA(rD, mkexpr(dstT), condT, Ijk_Boring);
17793          DIP("%s%s r%u, r%u, ROR #%d\n", nm, nCC(INSN_COND), rD, rM, rot);
17794          goto decode_success;
17795       }
17796       /* fall through */
17797    }
17798
17799    /* ------------------- bfi, bfc ------------------- */
17800    if (BITS8(0,1,1,1,1,1,0, 0) == (INSN(27,20) & BITS8(1,1,1,1,1,1,1,0))
17801        && BITS4(0, 0,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
17802       UInt rD  = INSN(15,12);
17803       UInt rN  = INSN(3,0);
17804       UInt msb = (insn >> 16) & 0x1F; /* 20:16 */
17805       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
17806       if (rD == 15 || msb < lsb) {
17807          /* undecodable; fall through */
17808       } else {
17809          IRTemp src    = newTemp(Ity_I32);
17810          IRTemp olddst = newTemp(Ity_I32);
17811          IRTemp newdst = newTemp(Ity_I32);
17812          UInt   mask   = ((UInt)1) << (msb - lsb);
17813          mask = (mask - 1) + mask;
17814          vassert(mask != 0); // guaranteed by "msb < lsb" check above
17815          mask <<= lsb;
17816
17817          assign(src, rN == 15 ? mkU32(0) : getIRegA(rN));
17818          assign(olddst, getIRegA(rD));
17819          assign(newdst,
17820                 binop(Iop_Or32,
17821                    binop(Iop_And32,
17822                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
17823                          mkU32(mask)),
17824                    binop(Iop_And32,
17825                          mkexpr(olddst),
17826                          mkU32(~mask)))
17827                );
17828
17829          putIRegA(rD, mkexpr(newdst), condT, Ijk_Boring);
17830
17831          if (rN == 15) {
17832             DIP("bfc%s r%u, #%u, #%u\n",
17833                 nCC(INSN_COND), rD, lsb, msb-lsb+1);
17834          } else {
17835             DIP("bfi%s r%u, r%u, #%u, #%u\n",
17836                 nCC(INSN_COND), rD, rN, lsb, msb-lsb+1);
17837          }
17838          goto decode_success;
17839       }
17840       /* fall through */
17841    }
17842
17843    /* ------------------- {u,s}bfx ------------------- */
17844    if (BITS8(0,1,1,1,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,0))
17845        && BITS4(0,1,0,1) == (INSN(7,4) & BITS4(0,1,1,1))) {
17846       UInt rD  = INSN(15,12);
17847       UInt rN  = INSN(3,0);
17848       UInt wm1 = (insn >> 16) & 0x1F; /* 20:16 */
17849       UInt lsb = (insn >> 7) & 0x1F;  /* 11:7 */
17850       UInt msb = lsb + wm1;
17851       UInt isU = (insn >> 22) & 1;    /* 22:22 */
17852       if (rD == 15 || rN == 15 || msb >= 32) {
17853          /* undecodable; fall through */
17854       } else {
17855          IRTemp src  = newTemp(Ity_I32);
17856          IRTemp tmp  = newTemp(Ity_I32);
17857          IRTemp res  = newTemp(Ity_I32);
17858          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
17859          vassert(msb >= 0 && msb <= 31);
17860          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
17861
17862          assign(src, getIRegA(rN));
17863          assign(tmp, binop(Iop_And32,
17864                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
17865                            mkU32(mask)));
17866          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
17867                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
17868                            mkU8(31-wm1)));
17869
17870          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
17871
17872          DIP("%s%s r%u, r%u, #%u, #%u\n",
17873              isU ? "ubfx" : "sbfx",
17874              nCC(INSN_COND), rD, rN, lsb, wm1 + 1);
17875          goto decode_success;
17876       }
17877       /* fall through */
17878    }
17879
17880    /* --------------------- Load/store doubleword ------------- */
17881    // LDRD STRD
17882    /*                 31   27   23   19 15 11   7    3     # highest bit
17883                         28   24   20 16 12    8    4    0
17884       A5-36   1 | 16  cond 0001 U100 Rn Rd im4h 11S1 im4l
17885       A5-38   1 | 32  cond 0001 U000 Rn Rd 0000 11S1 Rm
17886       A5-40   2 | 16  cond 0001 U110 Rn Rd im4h 11S1 im4l
17887       A5-42   2 | 32  cond 0001 U010 Rn Rd 0000 11S1 Rm
17888       A5-44   3 | 16  cond 0000 U100 Rn Rd im4h 11S1 im4l
17889       A5-46   3 | 32  cond 0000 U000 Rn Rd 0000 11S1 Rm
17890    */
17891    /* case coding:
17892              1   at-ea               (access at ea)
17893              2   at-ea-then-upd      (access at ea, then Rn = ea)
17894              3   at-Rn-then-upd      (access at Rn, then Rn = ea)
17895       ea coding
17896              16  Rn +/- imm8
17897              32  Rn +/- Rm
17898    */
17899    /* Quickly skip over all of this for hopefully most instructions */
17900    if ((INSN(27,24) & BITS4(1,1,1,0)) != BITS4(0,0,0,0))
17901       goto after_load_store_doubleword;
17902
17903    /* Check the "11S1" thing. */
17904    if ((INSN(7,4) & BITS4(1,1,0,1)) != BITS4(1,1,0,1))
17905       goto after_load_store_doubleword;
17906
17907    summary = 0;
17908
17909    /**/ if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,0,0)) {
17910       summary = 1 | 16;
17911    }
17912    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,0,0)) {
17913       summary = 1 | 32;
17914    }
17915    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(1,1,0)) {
17916       summary = 2 | 16;
17917    }
17918    else if (INSN(27,24) == BITS4(0,0,0,1) && INSN(22,20) == BITS3(0,1,0)) {
17919       summary = 2 | 32;
17920    }
17921    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(1,0,0)) {
17922       summary = 3 | 16;
17923    }
17924    else if (INSN(27,24) == BITS4(0,0,0,0) && INSN(22,20) == BITS3(0,0,0)) {
17925       summary = 3 | 32;
17926    }
17927    else goto after_load_store_doubleword;
17928
17929    { UInt rN   = (insn >> 16) & 0xF; /* 19:16 */
17930      UInt rD   = (insn >> 12) & 0xF; /* 15:12 */
17931      UInt rM   = (insn >> 0)  & 0xF; /*  3:0  */
17932      UInt bU   = (insn >> 23) & 1;   /* 23 U=1 offset+, U=0 offset- */
17933      UInt bS   = (insn >> 5) & 1;    /* S=1 store, S=0 load */
17934      UInt imm8 = ((insn >> 4) & 0xF0) | (insn & 0xF); /* 11:8, 3:0 */
17935
17936      /* Require rD to be an even numbered register */
17937      if ((rD & 1) != 0)
17938         goto after_load_store_doubleword;
17939
17940      /* Require 11:8 == 0 for Rn +/- Rm cases */
17941      if ((summary & 32) != 0 && (imm8 & 0xF0) != 0)
17942         goto after_load_store_doubleword;
17943
17944      /* Skip some invalid cases, which would lead to two competing
17945         updates to the same register, or which are otherwise
17946         disallowed by the spec. */
17947      switch (summary) {
17948         case 1 | 16:
17949            break;
17950         case 1 | 32:
17951            if (rM == 15) goto after_load_store_doubleword;
17952            break;
17953         case 2 | 16: case 3 | 16:
17954            if (rN == 15) goto after_load_store_doubleword;
17955            if (bS == 0 && (rN == rD || rN == rD+1))
17956               goto after_load_store_doubleword;
17957            break;
17958         case 2 | 32: case 3 | 32:
17959            if (rM == 15) goto after_load_store_doubleword;
17960            if (rN == 15) goto after_load_store_doubleword;
17961            if (rN == rM) goto after_load_store_doubleword;
17962            if (bS == 0 && (rN == rD || rN == rD+1))
17963               goto after_load_store_doubleword;
17964            break;
17965         default:
17966            vassert(0);
17967      }
17968
17969      /* If this is a branch, make it unconditional at this point.
17970         Doing conditional branches in-line is too complex (for
17971         now). */
17972      vassert((rD & 1) == 0); /* from tests above */
17973      if (bS == 0 && rD+1 == 15 && condT != IRTemp_INVALID) {
17974         // go uncond
17975         mk_skip_over_A32_if_cond_is_false( condT );
17976         condT = IRTemp_INVALID;
17977         // now uncond
17978      }
17979
17980      /* compute the effective address.  Bind it to a tmp since we
17981         may need to use it twice. */
17982      IRExpr* eaE = NULL;
17983      switch (summary & 0xF0) {
17984         case 16:
17985            eaE = mk_EA_reg_plusminus_imm8( rN, bU, imm8, dis_buf );
17986            break;
17987         case 32:
17988            eaE = mk_EA_reg_plusminus_reg( rN, bU, rM, dis_buf );
17989            break;
17990      }
17991      vassert(eaE);
17992      IRTemp eaT = newTemp(Ity_I32);
17993      assign(eaT, eaE);
17994
17995      /* get the old Rn value */
17996      IRTemp rnT = newTemp(Ity_I32);
17997      assign(rnT, getIRegA(rN));
17998
17999      /* decide on the transfer address */
18000      IRTemp taT = IRTemp_INVALID;
18001      switch (summary & 0x0F) {
18002         case 1: case 2: taT = eaT; break;
18003         case 3:         taT = rnT; break;
18004      }
18005      vassert(taT != IRTemp_INVALID);
18006
18007      /* XXX deal with alignment constraints */
18008      /* XXX: but the A8 doesn't seem to trap for misaligned loads, so,
18009         ignore alignment issues for the time being. */
18010
18011      /* For almost all cases, we do the writeback after the transfers.
18012         However, that leaves the stack "uncovered" in cases like:
18013            strd    rD, [sp, #-8]
18014            strd    rD, [sp, #-16]
18015         In which case, do the writeback to SP now, instead of later.
18016         This is bad in that it makes the insn non-restartable if the
18017         accesses fault, but at least keeps Memcheck happy. */
18018      Bool writeback_already_done = False;
18019      if (bS == 1 /*store*/ && summary == (2 | 16)
18020          && rN == 13 && rN != rD && rN != rD+1
18021          && bU == 0/*minus*/
18022          && (imm8 == 8 || imm8 == 16)) {
18023         putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
18024         writeback_already_done = True;
18025      }
18026
18027      /* doubleword store  S 1
18028         doubleword load   S 0
18029      */
18030      const HChar* name = NULL;
18031      /* generate the transfers */
18032      if (bS == 1) { // doubleword store
18033         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(0)),
18034                         getIRegA(rD+0), condT );
18035         storeGuardedLE( binop(Iop_Add32, mkexpr(taT), mkU32(4)),
18036                         getIRegA(rD+1), condT );
18037         name = "strd";
18038      } else { // doubleword load
18039         IRTemp oldRd0 = newTemp(Ity_I32);
18040         IRTemp oldRd1 = newTemp(Ity_I32);
18041         assign(oldRd0, llGetIReg(rD+0));
18042         assign(oldRd1, llGetIReg(rD+1));
18043         IRTemp newRd0 = newTemp(Ity_I32);
18044         IRTemp newRd1 = newTemp(Ity_I32);
18045         loadGuardedLE( newRd0, ILGop_Ident32,
18046                        binop(Iop_Add32, mkexpr(taT), mkU32(0)),
18047                        mkexpr(oldRd0), condT );
18048         putIRegA( rD+0, mkexpr(newRd0), IRTemp_INVALID, Ijk_Boring );
18049         loadGuardedLE( newRd1, ILGop_Ident32,
18050                        binop(Iop_Add32, mkexpr(taT), mkU32(4)),
18051                        mkexpr(oldRd1), condT );
18052         putIRegA( rD+1, mkexpr(newRd1), IRTemp_INVALID, Ijk_Boring );
18053         name = "ldrd";
18054      }
18055
18056      /* Update Rn if necessary. */
18057      switch (summary & 0x0F) {
18058         case 2: case 3:
18059            // should be assured by logic above:
18060            vassert(rN != 15); /* from checks above */
18061            if (bS == 0) {
18062               vassert(rD+0 != rN); /* since we just wrote rD+0 */
18063               vassert(rD+1 != rN); /* since we just wrote rD+1 */
18064            }
18065            if (!writeback_already_done)
18066               putIRegA( rN, mkexpr(eaT), condT, Ijk_Boring );
18067            break;
18068      }
18069
18070      switch (summary & 0x0F) {
18071         case 1:  DIP("%s%s r%u, %s\n", name, nCC(INSN_COND), rD, dis_buf);
18072                  break;
18073         case 2:  DIP("%s%s r%u, %s! (at-EA-then-Rn=EA)\n",
18074                      name, nCC(INSN_COND), rD, dis_buf);
18075                  break;
18076         case 3:  DIP("%s%s r%u, %s! (at-Rn-then-Rn=EA)\n",
18077                      name, nCC(INSN_COND), rD, dis_buf);
18078                  break;
18079         default: vassert(0);
18080      }
18081
18082      goto decode_success;
18083    }
18084
18085   after_load_store_doubleword:
18086
18087    /* ------------------- {s,u}xtab ------------- */
18088    if (BITS8(0,1,1,0,1,0,1,0) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
18089        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
18090        && BITS4(0,1,1,1) == INSN(7,4)) {
18091       UInt rN  = INSN(19,16);
18092       UInt rD  = INSN(15,12);
18093       UInt rM  = INSN(3,0);
18094       UInt rot = (insn >> 10) & 3;
18095       UInt isU = INSN(22,22);
18096       if (rN == 15/*it's {S,U}XTB*/ || rD == 15 || rM == 15) {
18097          /* undecodable; fall through */
18098       } else {
18099          IRTemp srcL = newTemp(Ity_I32);
18100          IRTemp srcR = newTemp(Ity_I32);
18101          IRTemp res  = newTemp(Ity_I32);
18102          assign(srcR, getIRegA(rM));
18103          assign(srcL, getIRegA(rN));
18104          assign(res,  binop(Iop_Add32,
18105                             mkexpr(srcL),
18106                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
18107                                  unop(Iop_32to8,
18108                                       genROR32(srcR, 8 * rot)))));
18109          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18110          DIP("%cxtab%s r%u, r%u, r%u, ror #%u\n",
18111              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
18112          goto decode_success;
18113       }
18114       /* fall through */
18115    }
18116
18117    /* ------------------- {s,u}xtah ------------- */
18118    if (BITS8(0,1,1,0,1,0,1,1) == (INSN(27,20) & BITS8(1,1,1,1,1,0,1,1))
18119        && BITS4(0,0,0,0) == (INSN(11,8) & BITS4(0,0,1,1))
18120        && BITS4(0,1,1,1) == INSN(7,4)) {
18121       UInt rN  = INSN(19,16);
18122       UInt rD  = INSN(15,12);
18123       UInt rM  = INSN(3,0);
18124       UInt rot = (insn >> 10) & 3;
18125       UInt isU = INSN(22,22);
18126       if (rN == 15/*it's {S,U}XTH*/ || rD == 15 || rM == 15) {
18127          /* undecodable; fall through */
18128       } else {
18129          IRTemp srcL = newTemp(Ity_I32);
18130          IRTemp srcR = newTemp(Ity_I32);
18131          IRTemp res  = newTemp(Ity_I32);
18132          assign(srcR, getIRegA(rM));
18133          assign(srcL, getIRegA(rN));
18134          assign(res,  binop(Iop_Add32,
18135                             mkexpr(srcL),
18136                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
18137                                  unop(Iop_32to16,
18138                                       genROR32(srcR, 8 * rot)))));
18139          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18140
18141          DIP("%cxtah%s r%u, r%u, r%u, ror #%u\n",
18142              isU ? 'u' : 's', nCC(INSN_COND), rD, rN, rM, rot);
18143          goto decode_success;
18144       }
18145       /* fall through */
18146    }
18147
18148    /* ------------------- rev16, rev ------------------ */
18149    if (INSN(27,16) == 0x6BF
18150        && (INSN(11,4) == 0xFB/*rev16*/ || INSN(11,4) == 0xF3/*rev*/)) {
18151       Bool isREV = INSN(11,4) == 0xF3;
18152       UInt rM    = INSN(3,0);
18153       UInt rD    = INSN(15,12);
18154       if (rM != 15 && rD != 15) {
18155          IRTemp rMt = newTemp(Ity_I32);
18156          assign(rMt, getIRegA(rM));
18157          IRTemp res = isREV ? gen_REV(rMt) : gen_REV16(rMt);
18158          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18159          DIP("rev%s%s r%u, r%u\n", isREV ? "" : "16",
18160              nCC(INSN_COND), rD, rM);
18161          goto decode_success;
18162       }
18163    }
18164
18165    /* ------------------- revsh ----------------------- */
18166    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xFB) {
18167       UInt rM = INSN(3,0);
18168       UInt rD = INSN(15,12);
18169       if (rM != 15 && rD != 15) {
18170          IRTemp irt_rM  = newTemp(Ity_I32);
18171          IRTemp irt_hi  = newTemp(Ity_I32);
18172          IRTemp irt_low = newTemp(Ity_I32);
18173          IRTemp irt_res = newTemp(Ity_I32);
18174          assign(irt_rM, getIRegA(rM));
18175          assign(irt_hi,
18176                 binop(Iop_Sar32,
18177                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
18178                       mkU8(16)
18179                 )
18180          );
18181          assign(irt_low,
18182                 binop(Iop_And32,
18183                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
18184                       mkU32(0xFF)
18185                 )
18186          );
18187          assign(irt_res,
18188                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
18189          );
18190          putIRegA(rD, mkexpr(irt_res), condT, Ijk_Boring);
18191          DIP("revsh%s r%u, r%u\n", nCC(INSN_COND), rD, rM);
18192          goto decode_success;
18193       }
18194    }
18195
18196    /* ------------------- rbit ------------------ */
18197    if (INSN(27,16) == 0x6FF && INSN(11,4) == 0xF3) {
18198       UInt rD = INSN(15,12);
18199       UInt rM = INSN(3,0);
18200       if (rD != 15 && rM != 15) {
18201          IRTemp arg = newTemp(Ity_I32);
18202          assign(arg, getIRegA(rM));
18203          IRTemp res = gen_BITREV(arg);
18204          putIRegA(rD, mkexpr(res), condT, Ijk_Boring);
18205          DIP("rbit r%u, r%u\n", rD, rM);
18206          goto decode_success;
18207       }
18208    }
18209
18210    /* ------------------- smmul ------------------ */
18211    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
18212        && INSN(15,12) == BITS4(1,1,1,1)
18213        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
18214       UInt bitR = INSN(5,5);
18215       UInt rD = INSN(19,16);
18216       UInt rM = INSN(11,8);
18217       UInt rN = INSN(3,0);
18218       if (rD != 15 && rM != 15 && rN != 15) {
18219          IRExpr* res
18220          = unop(Iop_64HIto32,
18221                 binop(Iop_Add64,
18222                       binop(Iop_MullS32, getIRegA(rN), getIRegA(rM)),
18223                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
18224          putIRegA(rD, res, condT, Ijk_Boring);
18225          DIP("smmul%s%s r%u, r%u, r%u\n",
18226              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM);
18227          goto decode_success;
18228       }
18229    }
18230
18231    /* ------------------- smmla ------------------ */
18232    if (INSN(27,20) == BITS8(0,1,1,1,0,1,0,1)
18233        && INSN(15,12) != BITS4(1,1,1,1)
18234        && (INSN(7,4) & BITS4(1,1,0,1)) == BITS4(0,0,0,1)) {
18235       UInt bitR = INSN(5,5);
18236       UInt rD = INSN(19,16);
18237       UInt rA = INSN(15,12);
18238       UInt rM = INSN(11,8);
18239       UInt rN = INSN(3,0);
18240       if (rD != 15 && rM != 15 && rN != 15) {
18241          IRExpr* res
18242          = unop(Iop_64HIto32,
18243                 binop(Iop_Add64,
18244                       binop(Iop_Add64,
18245                             binop(Iop_32HLto64, getIRegA(rA), mkU32(0)),
18246                             binop(Iop_MullS32, getIRegA(rN), getIRegA(rM))),
18247                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
18248          putIRegA(rD, res, condT, Ijk_Boring);
18249          DIP("smmla%s%s r%u, r%u, r%u, r%u\n",
18250              nCC(INSN_COND), bitR ? "r" : "", rD, rN, rM, rA);
18251          goto decode_success;
18252       }
18253    }
18254
18255    /* -------------- (A1) LDRT reg+/-#imm12 -------------- */
18256    /* Load Register Unprivileged:
18257       ldrt<c> Rt, [Rn] {, #+/-imm12}
18258    */
18259    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,1) ) {
18260       UInt rT     = INSN(15,12);
18261       UInt rN     = INSN(19,16);
18262       UInt imm12  = INSN(11,0);
18263       UInt bU     = INSN(23,23);
18264       Bool valid  = True;
18265       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18266       if (valid) {
18267          IRTemp newRt = newTemp(Ity_I32);
18268          loadGuardedLE( newRt,
18269                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
18270          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18271          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18272                              getIRegA(rN), mkU32(imm12));
18273          putIRegA(rN, erN, condT, Ijk_Boring);
18274          DIP("ldrt%s r%u, [r%u], #%c%u\n",
18275              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18276          goto decode_success;
18277       }
18278    }
18279
18280    /* -------------- (A2) LDRT reg+/-reg with shift -------------- */
18281    /* Load Register Unprivileged:
18282       ldrt<c> Rt, [Rn], +/-Rm{, shift}
18283    */
18284    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,1)
18285         && INSN(4,4) == 0 ) {
18286       UInt rT     = INSN(15,12);
18287       UInt rN     = INSN(19,16);
18288       UInt rM     = INSN(3,0);
18289       UInt imm5   = INSN(11,7);
18290       UInt bU     = INSN(23,23);
18291       UInt type   = INSN(6,5);
18292       Bool valid  = True;
18293       if (rT == 15 || rN == 15 || rN == rT || rM == 15
18294           /* || (ArchVersion() < 6 && rM == rN) */)
18295          valid = False;
18296       if (valid) {
18297          IRTemp newRt = newTemp(Ity_I32);
18298          loadGuardedLE( newRt,
18299                         ILGop_Ident32, getIRegA(rN), getIRegA(rT), condT );
18300          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18301          // dis_buf generated is slightly bogus, in fact.
18302          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18303                                                        type, imm5, dis_buf);
18304          putIRegA(rN, erN, condT, Ijk_Boring);
18305          DIP("ldrt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18306          goto decode_success;
18307       }
18308    }
18309
18310    /* -------------- (A1) LDRBT reg+/-#imm12 -------------- */
18311    /* Load Register Byte Unprivileged:
18312       ldrbt<c> Rt, [Rn], #+/-imm12
18313    */
18314    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,1) ) {
18315       UInt rT     = INSN(15,12);
18316       UInt rN     = INSN(19,16);
18317       UInt imm12  = INSN(11,0);
18318       UInt bU     = INSN(23,23);
18319       Bool valid  = True;
18320       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18321       if (valid) {
18322          IRTemp newRt = newTemp(Ity_I32);
18323          loadGuardedLE( newRt,
18324                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
18325          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18326          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18327                              getIRegA(rN), mkU32(imm12));
18328          putIRegA(rN, erN, condT, Ijk_Boring);
18329          DIP("ldrbt%s r%u, [r%u], #%c%u\n",
18330              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18331          goto decode_success;
18332       }
18333    }
18334
18335    /* -------------- (A2) LDRBT reg+/-reg with shift -------------- */
18336    /* Load Register Byte Unprivileged:
18337       ldrbt<c> Rt, [Rn], +/-Rm{, shift}
18338    */
18339    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,1)
18340         && INSN(4,4) == 0 ) {
18341       UInt rT     = INSN(15,12);
18342       UInt rN     = INSN(19,16);
18343       UInt rM     = INSN(3,0);
18344       UInt imm5   = INSN(11,7);
18345       UInt bU     = INSN(23,23);
18346       UInt type   = INSN(6,5);
18347       Bool valid  = True;
18348       if (rT == 15 || rN == 15 || rN == rT || rM == 15
18349           /* || (ArchVersion() < 6 && rM == rN) */)
18350          valid = False;
18351       if (valid) {
18352          IRTemp newRt = newTemp(Ity_I32);
18353          loadGuardedLE( newRt,
18354                         ILGop_8Uto32, getIRegA(rN), getIRegA(rT), condT );
18355          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18356          // dis_buf generated is slightly bogus, in fact.
18357          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18358                                                        type, imm5, dis_buf);
18359          putIRegA(rN, erN, condT, Ijk_Boring);
18360          DIP("ldrbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18361          goto decode_success;
18362       }
18363    }
18364
18365    /* -------------- (A1) LDRHT reg+#imm8 -------------- */
18366    /* Load Register Halfword Unprivileged:
18367       ldrht<c> Rt, [Rn] {, #+/-imm8}
18368    */
18369    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
18370        && INSN(7,4) == BITS4(1,0,1,1) ) {
18371       UInt rT    = INSN(15,12);
18372       UInt rN    = INSN(19,16);
18373       UInt bU    = INSN(23,23);
18374       UInt imm4H = INSN(11,8);
18375       UInt imm4L = INSN(3,0);
18376       UInt imm8  = (imm4H << 4) | imm4L;
18377       Bool valid = True;
18378       if (rT == 15 || rN == 15 || rN == rT)
18379          valid = False;
18380       if (valid) {
18381          IRTemp newRt = newTemp(Ity_I32);
18382          loadGuardedLE( newRt,
18383                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
18384          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18385          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18386                              getIRegA(rN), mkU32(imm8));
18387          putIRegA(rN, erN, condT, Ijk_Boring);
18388          DIP("ldrht%s r%u, [r%u], #%c%u\n",
18389              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18390          goto decode_success;
18391       }
18392    }
18393
18394    /* -------------- (A2) LDRHT reg+/-reg -------------- */
18395    /* Load Register Halfword Unprivileged:
18396       ldrht<c> Rt, [Rn], +/-Rm
18397    */
18398    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
18399        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
18400       UInt rT    = INSN(15,12);
18401       UInt rN    = INSN(19,16);
18402       UInt rM    = INSN(3,0);
18403       UInt bU    = INSN(23,23);
18404       Bool valid = True;
18405       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
18406          valid = False;
18407       if (valid) {
18408          IRTemp newRt = newTemp(Ity_I32);
18409          loadGuardedLE( newRt,
18410                         ILGop_16Uto32, getIRegA(rN), getIRegA(rT), condT );
18411          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18412          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18413                              getIRegA(rN), getIRegA(rM));
18414          putIRegA(rN, erN, condT, Ijk_Boring);
18415          DIP("ldrht%s r%u, [r%u], %cr%u\n",
18416              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18417          goto decode_success;
18418       }
18419    }
18420
18421    /* -------------- (A1) LDRSHT reg+#imm8 -------------- */
18422    /* Load Register Signed Halfword Unprivileged:
18423       ldrsht<c> Rt, [Rn] {, #+/-imm8}
18424    */
18425    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
18426        && INSN(7,4) == BITS4(1,1,1,1)) {
18427       UInt rT    = INSN(15,12);
18428       UInt rN    = INSN(19,16);
18429       UInt bU    = INSN(23,23);
18430       UInt imm4H = INSN(11,8);
18431       UInt imm4L = INSN(3,0);
18432       UInt imm8  = (imm4H << 4) | imm4L;
18433       Bool valid = True;
18434       if (rN == 15 || rT == 15 || rN == rT)
18435          valid = False;
18436       if (valid) {
18437          IRTemp newRt = newTemp(Ity_I32);
18438          loadGuardedLE( newRt,
18439                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
18440          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18441          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18442                              getIRegA(rN), mkU32(imm8));
18443          putIRegA(rN, erN, condT, Ijk_Boring);
18444          DIP("ldrsht%s r%u, [r%u], #%c%u\n",
18445              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18446          goto decode_success;
18447       }
18448    }
18449
18450    /* -------------- (A2) LDRSHT reg+/-reg -------------- */
18451    /* Load Register Signed Halfword Unprivileged:
18452       ldrsht<c> Rt, [Rn], +/-Rm
18453    */
18454    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
18455        && INSN(11,4) == BITS8(0,0,0,0,1,1,1,1)) {
18456       UInt rT    = INSN(15,12);
18457       UInt rN    = INSN(19,16);
18458       UInt rM    = INSN(3,0);
18459       UInt bU    = INSN(23,23);
18460       Bool valid = True;
18461       if (rN == 15 || rT == 15 || rN == rT || rM == 15)
18462          valid = False;
18463       if (valid) {
18464          IRTemp newRt = newTemp(Ity_I32);
18465          loadGuardedLE( newRt,
18466                         ILGop_16Sto32, getIRegA(rN), getIRegA(rT), condT );
18467          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18468          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18469                              getIRegA(rN), getIRegA(rM));
18470          putIRegA(rN, erN, condT, Ijk_Boring);
18471          DIP("ldrsht%s r%u, [r%u], %cr%u\n",
18472              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18473          goto decode_success;
18474       }
18475    }
18476
18477    /* -------------- (A1) LDRSBT reg+#imm8 -------------- */
18478    /* Load Register Signed Byte Unprivileged:
18479       ldrsbt<c> Rt, [Rn] {, #+/-imm8}
18480    */
18481    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,1)
18482        && INSN(7,4) == BITS4(1,1,0,1)) {
18483       UInt rT    = INSN(15,12);
18484       UInt rN    = INSN(19,16);
18485       UInt bU    = INSN(23,23);
18486       UInt imm4H = INSN(11,8);
18487       UInt imm4L = INSN(3,0);
18488       UInt imm8  = (imm4H << 4) | imm4L;
18489       Bool valid = True;
18490       if (rT == 15 || rN == 15 || rN == rT)
18491          valid = False;
18492       if (valid) {
18493          IRTemp newRt = newTemp(Ity_I32);
18494          loadGuardedLE( newRt,
18495                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
18496          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18497          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18498                              getIRegA(rN), mkU32(imm8));
18499          putIRegA(rN, erN, condT, Ijk_Boring);
18500          DIP("ldrsbt%s r%u, [r%u], #%c%u\n",
18501              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18502          goto decode_success;
18503       }
18504    }
18505
18506    /* -------------- (A2) LDRSBT reg+/-reg -------------- */
18507    /* Load Register Signed Byte Unprivileged:
18508       ldrsbt<c> Rt, [Rn], +/-Rm
18509    */
18510    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,1)
18511        && INSN(11,4) == BITS8(0,0,0,0,1,1,0,1)) {
18512       UInt rT    = INSN(15,12);
18513       UInt rN    = INSN(19,16);
18514       UInt bU    = INSN(23,23);
18515       UInt rM    = INSN(3,0);
18516       Bool valid = True;
18517       if (rT == 15 || rN == 15 || rN == rT || rM == 15)
18518          valid = False;
18519       if (valid) {
18520          IRTemp newRt = newTemp(Ity_I32);
18521          loadGuardedLE( newRt,
18522                         ILGop_8Sto32, getIRegA(rN), getIRegA(rT), condT );
18523          putIRegA(rT, mkexpr(newRt), IRTemp_INVALID, Ijk_Boring);
18524          IRExpr* erN = binop(bU ? Iop_Add32 : Iop_Sub32,
18525                              getIRegA(rN), getIRegA(rM));
18526          putIRegA(rN, erN, condT, Ijk_Boring);
18527          DIP("ldrsbt%s r%u, [r%u], %cr%u\n",
18528              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18529          goto decode_success;
18530       }
18531    }
18532
18533    /* -------------- (A1) STRBT reg+#imm12 -------------- */
18534    /* Store Register Byte Unprivileged:
18535       strbt<c> Rt, [Rn], #+/-imm12
18536    */
18537    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,1,1,0) ) {
18538       UInt rT     = INSN(15,12);
18539       UInt rN     = INSN(19,16);
18540       UInt imm12  = INSN(11,0);
18541       UInt bU     = INSN(23,23);
18542       Bool valid = True;
18543       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18544       if (valid) {
18545          IRExpr* address = getIRegA(rN);
18546          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
18547          storeGuardedLE( address, data, condT);
18548          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18549                                getIRegA(rN), mkU32(imm12));
18550          putIRegA(rN, newRn, condT, Ijk_Boring);
18551          DIP("strbt%s r%u, [r%u], #%c%u\n",
18552              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18553          goto decode_success;
18554       }
18555    }
18556
18557    /* -------------- (A2) STRBT reg+/-reg -------------- */
18558    /* Store Register Byte Unprivileged:
18559       strbt<c> Rt, [Rn], +/-Rm{, shift}
18560    */
18561    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,1,1,0)
18562        && INSN(4,4) == 0) {
18563       UInt rT     = INSN(15,12);
18564       UInt rN     = INSN(19,16);
18565       UInt imm5   = INSN(11,7);
18566       UInt type   = INSN(6,5);
18567       UInt rM     = INSN(3,0);
18568       UInt bU     = INSN(23,23);
18569       Bool valid  = True;
18570       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
18571       if (valid) {
18572          IRExpr* address = getIRegA(rN);
18573          IRExpr* data = unop(Iop_32to8, getIRegA(rT));
18574          storeGuardedLE( address, data, condT);
18575          // dis_buf generated is slightly bogus, in fact.
18576          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18577                                                        type, imm5, dis_buf);
18578          putIRegA(rN, erN, condT, Ijk_Boring);
18579          DIP("strbt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18580          goto decode_success;
18581       }
18582    }
18583
18584    /* -------------- (A1) STRHT reg+#imm8 -------------- */
18585    /* Store Register Halfword Unprivileged:
18586       strht<c> Rt, [Rn], #+/-imm8
18587    */
18588    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,1,1,0)
18589        && INSN(7,4) == BITS4(1,0,1,1) ) {
18590       UInt rT    = INSN(15,12);
18591       UInt rN    = INSN(19,16);
18592       UInt imm4H = INSN(11,8);
18593       UInt imm4L = INSN(3,0);
18594       UInt imm8  = (imm4H << 4) | imm4L;
18595       UInt bU    = INSN(23,23);
18596       Bool valid = True;
18597       if (rT == 15 || rN == 15 || rN == rT) valid = False;
18598       if (valid) {
18599          IRExpr* address = getIRegA(rN);
18600          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
18601          storeGuardedLE( address, data, condT);
18602          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18603                                getIRegA(rN), mkU32(imm8));
18604          putIRegA(rN, newRn, condT, Ijk_Boring);
18605          DIP("strht%s r%u, [r%u], #%c%u\n",
18606              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm8);
18607          goto decode_success;
18608       }
18609    }
18610
18611    /* -------------- (A2) STRHT reg+reg -------------- */
18612    /* Store Register Halfword Unprivileged:
18613       strht<c> Rt, [Rn], +/-Rm
18614    */
18615    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,0,0,0,0,0,1,0)
18616        && INSN(11,4) == BITS8(0,0,0,0,1,0,1,1) ) {
18617       UInt rT    = INSN(15,12);
18618       UInt rN    = INSN(19,16);
18619       UInt rM    = INSN(3,0);
18620       UInt bU    = INSN(23,23);
18621       Bool valid = True;
18622       if (rT == 15 || rN == 15 || rN == rT || rM == 15) valid = False;
18623       if (valid) {
18624          IRExpr* address = getIRegA(rN);
18625          IRExpr* data = unop(Iop_32to16, getIRegA(rT));
18626          storeGuardedLE( address, data, condT);
18627          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18628                                getIRegA(rN), getIRegA(rM));
18629          putIRegA(rN, newRn, condT, Ijk_Boring);
18630          DIP("strht%s r%u, [r%u], %cr%u\n",
18631              nCC(INSN_COND), rT, rN, bU ? '+' : '-', rM);
18632          goto decode_success;
18633       }
18634    }
18635
18636    /* -------------- (A1) STRT reg+imm12 -------------- */
18637    /* Store Register Unprivileged:
18638       strt<c> Rt, [Rn], #+/-imm12
18639    */
18640    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,0,0,0,0,1,0) ) {
18641       UInt rT    = INSN(15,12);
18642       UInt rN    = INSN(19,16);
18643       UInt imm12 = INSN(11,0);
18644       UInt bU    = INSN(23,23);
18645       Bool valid = True;
18646       if (rN == 15 || rN == rT) valid = False;
18647       if (valid) {
18648          IRExpr* address = getIRegA(rN);
18649          storeGuardedLE( address, getIRegA(rT), condT);
18650          IRExpr* newRn = binop(bU ? Iop_Add32 : Iop_Sub32,
18651                                getIRegA(rN), mkU32(imm12));
18652          putIRegA(rN, newRn, condT, Ijk_Boring);
18653          DIP("strt%s r%u, [r%u], %c%u\n",
18654              nCC(INSN_COND), rT, rN, bU ? '+' : '-', imm12);
18655          goto decode_success;
18656       }
18657    }
18658
18659    /* -------------- (A2) STRT reg+reg -------------- */
18660    /* Store Register Unprivileged:
18661       strt<c> Rt, [Rn], +/-Rm{, shift}
18662    */
18663    if ( (INSN(27,20) & BITS8(1,1,1,1,0,1,1,1)) == BITS8(0,1,1,0,0,0,1,0)
18664        && INSN(4,4) == 0 ) {
18665       UInt rT    = INSN(15,12);
18666       UInt rN    = INSN(19,16);
18667       UInt rM    = INSN(3,0);
18668       UInt type  = INSN(6,5);
18669       UInt imm5  = INSN(11,7);
18670       UInt bU    = INSN(23,23);
18671       Bool valid = True;
18672       if (rN == 15 || rN == rT || rM == 15) valid = False;
18673       /* FIXME We didn't do:
18674          if ArchVersion() < 6 && rM == rN then UNPREDICTABLE */
18675       if (valid) {
18676          storeGuardedLE( getIRegA(rN), getIRegA(rT), condT);
18677          // dis_buf generated is slightly bogus, in fact.
18678          IRExpr* erN = mk_EA_reg_plusminus_shifted_reg(rN, bU, rM,
18679                                                        type, imm5, dis_buf);
18680          putIRegA(rN, erN, condT, Ijk_Boring);
18681          DIP("strt%s r%u, %s\n", nCC(INSN_COND), rT, dis_buf);
18682          goto decode_success;
18683       }
18684    }
18685
18686    /* ----------------------------------------------------------- */
18687    /* -- ARMv7 instructions                                    -- */
18688    /* ----------------------------------------------------------- */
18689
18690    /* -------------- read CP15 TPIDRURO register ------------- */
18691    /* mrc     p15, 0, r0,  c13, c0, 3  up to
18692       mrc     p15, 0, r14, c13, c0, 3
18693    */
18694    /* I don't know whether this is really v7-only.  But anyway, we
18695       have to support it since arm-linux uses TPIDRURO as a thread
18696       state register. */
18697    if (0x0E1D0F70 == (insn & 0x0FFF0FFF)) {
18698       UInt rD = INSN(15,12);
18699       if (rD <= 14) {
18700          /* skip r15, that's too stupid to handle */
18701          putIRegA(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32),
18702                       condT, Ijk_Boring);
18703          DIP("mrc%s p15,0, r%u, c13, c0, 3\n", nCC(INSN_COND), rD);
18704          goto decode_success;
18705       }
18706       /* fall through */
18707    }
18708
18709    /* ------------ read/write CP15 TPIDRURW register ----------- */
18710    /* mcr     p15, 0, r0,  c13, c0, 2 (r->cr xfer)  up to
18711       mcr     p15, 0, r14, c13, c0, 2
18712
18713       mrc     p15, 0, r0,  c13, c0, 2 (rc->r xfer)  up to
18714       mrc     p15, 0, r14, c13, c0, 2
18715    */
18716    if (0x0E0D0F50 == (insn & 0x0FFF0FFF)) { // MCR
18717       UInt rS = INSN(15,12);
18718       if (rS <= 14) {
18719          /* skip r15, that's too stupid to handle */
18720          putMiscReg32(OFFB_TPIDRURW, getIRegA(rS), condT);
18721          DIP("mcr%s p15,0, r%u, c13, c0, 2\n", nCC(INSN_COND), rS);
18722          goto decode_success;
18723       }
18724       /* fall through */
18725    }
18726    if (0x0E1D0F50 == (insn & 0x0FFF0FFF)) { // MRC
18727       UInt rD = INSN(15,12);
18728       if (rD <= 14) {
18729          /* skip r15, that's too stupid to handle */
18730          putIRegA(rD, IRExpr_Get(OFFB_TPIDRURW, Ity_I32),
18731                       condT, Ijk_Boring);
18732          DIP("mrc%s p15,0, r%u, c13, c0, 2\n", nCC(INSN_COND), rD);
18733          goto decode_success;
18734       }
18735       /* fall through */
18736    }
18737
18738    /* -------------- read CP15 PMUSRENR register ------------- */
18739    /* mrc     p15, 0, r0,  c9, c14, 0  up to
18740       mrc     p15, 0, r14, c9, c14, 0
18741    */
18742    /* A program reading this register is really asking "which
18743       performance monitoring registes are available in user space?
18744       The simple answer here is to return zero, meaning "none".  See
18745       #345984. */
18746    if (0x0E190F1E == (insn & 0x0FFF0FFF)) {
18747       UInt rD = INSN(15,12);
18748       if (rD <= 14) {
18749          /* skip r15, that's too stupid to handle */
18750          putIRegA(rD, mkU32(0), condT, Ijk_Boring);
18751          DIP("mrc%s p15,0, r%u, c9, c14, 0\n", nCC(INSN_COND), rD);
18752          goto decode_success;
18753       }
18754       /* fall through */
18755    }
18756
18757    /* Handle various kinds of barriers.  This is rather indiscriminate
18758       in the sense that they are all turned into an IR Fence, which
18759       means we don't know which they are, so the back end has to
18760       re-emit them all when it comes acrosss an IR Fence.
18761    */
18762    /* v6 */ /* mcr 15, 0, rT, c7, c10, 5 */
18763    if (0xEE070FBA == (insn & 0xFFFF0FFF)) {
18764       UInt rT = INSN(15,12);
18765       if (rT <= 14) {
18766          /* mcr 15, 0, rT, c7, c10, 5 (v6) equiv to DMB (v7).  Data
18767             Memory Barrier -- ensures ordering of memory accesses. */
18768          stmt( IRStmt_MBE(Imbe_Fence) );
18769          DIP("mcr 15, 0, r%u, c7, c10, 5 (data memory barrier)\n", rT);
18770          goto decode_success;
18771       }
18772       /* fall through */
18773    }
18774    /* other flavours of barrier */
18775    switch (insn) {
18776       case 0xEE070F9A: /* v6 */
18777          /* mcr 15, 0, r0, c7, c10, 4 (v6) equiv to DSB (v7).  Data
18778             Synch Barrier -- ensures completion of memory accesses. */
18779          stmt( IRStmt_MBE(Imbe_Fence) );
18780          DIP("mcr 15, 0, r0, c7, c10, 4 (data synch barrier)\n");
18781          goto decode_success;
18782       case 0xEE070F95: /* v6 */
18783          /* mcr 15, 0, r0, c7, c5, 4 (v6) equiv to ISB (v7).
18784             Instruction Synchronisation Barrier (or Flush Prefetch
18785             Buffer) -- a pipe flush, I think.  I suspect we could
18786             ignore those, but to be on the safe side emit a fence
18787             anyway. */
18788          stmt( IRStmt_MBE(Imbe_Fence) );
18789          DIP("mcr 15, 0, r0, c7, c5, 4 (insn synch barrier)\n");
18790          goto decode_success;
18791       default:
18792          break;
18793    }
18794
18795    /* ----------------------------------------------------------- */
18796    /* -- Hints                                                 -- */
18797    /* ----------------------------------------------------------- */
18798
18799    switch (insn & 0x0FFFFFFF) {
18800       /* ------------------- NOP ------------------ */
18801       case 0x0320F000:
18802          DIP("nop%s\n", nCC(INSN_COND));
18803          goto decode_success;
18804       /* ------------------- YIELD ------------------ */
18805       case 0x0320F001:
18806          /* Continue after conditionally yielding. */
18807          DIP("yield%s\n", nCC(INSN_COND));
18808          stmt( IRStmt_Exit( unop(Iop_32to1,
18809                                  condT == IRTemp_INVALID
18810                                     ? mkU32(1) : mkexpr(condT)),
18811                             Ijk_Yield,
18812                             IRConst_U32(guest_R15_curr_instr_notENC + 4),
18813                             OFFB_R15T ));
18814          goto decode_success;
18815       default:
18816          break;
18817    }
18818
18819    /* ----------------------------------------------------------- */
18820    /* -- VFP (CP 10, CP 11) instructions (in ARM mode)         -- */
18821    /* ----------------------------------------------------------- */
18822
18823    if (INSN_COND != ARMCondNV) {
18824       Bool ok_vfp = decode_CP10_CP11_instruction (
18825                        &dres, INSN(27,0), condT, INSN_COND,
18826                        False/*!isT*/
18827                     );
18828       if (ok_vfp)
18829          goto decode_success;
18830    }
18831
18832    /* ----------------------------------------------------------- */
18833    /* -- NEON instructions (in ARM mode)                       -- */
18834    /* ----------------------------------------------------------- */
18835
18836    /* These are all in NV space, and so are taken care of (far) above,
18837       by a call from this function to
18838       decode_NV_instruction_ARMv7_and_below(). */
18839
18840    /* ----------------------------------------------------------- */
18841    /* -- v6 media instructions (in ARM mode)                   -- */
18842    /* ----------------------------------------------------------- */
18843
18844    { Bool ok_v6m = decode_V6MEDIA_instruction(
18845                        &dres, INSN(27,0), condT, INSN_COND,
18846                        False/*!isT*/
18847                    );
18848      if (ok_v6m)
18849         goto decode_success;
18850    }
18851
18852    /* ----------------------------------------------------------- */
18853    /* -- v8 instructions (in ARM mode)                         -- */
18854    /* ----------------------------------------------------------- */
18855
18856   after_v7_decoder:
18857
18858    /* If we get here, it means that all attempts to decode the
18859       instruction as ARMv7 or earlier have failed.  So, if we're doing
18860       ARMv8 or later, here is the point to try for it. */
18861
18862    if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
18863       Bool ok_v8
18864          = decode_V8_instruction( &dres, insn, condT, False/*!isT*/,
18865                                   IRTemp_INVALID, IRTemp_INVALID );
18866       if (ok_v8)
18867          goto decode_success;
18868    }
18869
18870    /* ----------------------------------------------------------- */
18871    /* -- Undecodable                                           -- */
18872    /* ----------------------------------------------------------- */
18873
18874    goto decode_failure;
18875    /*NOTREACHED*/
18876
18877   decode_failure:
18878    /* All decode failures end up here. */
18879    if (sigill_diag) {
18880       vex_printf("disInstr(arm): unhandled instruction: "
18881                  "0x%x\n", insn);
18882       vex_printf("                 cond=%d(0x%x) 27:20=%d(0x%02x) "
18883                                    "4:4=%d "
18884                                    "3:0=%d(0x%x)\n",
18885                  (Int)INSN_COND, (UInt)INSN_COND,
18886                  (Int)INSN(27,20), (UInt)INSN(27,20),
18887                  (Int)INSN(4,4),
18888                  (Int)INSN(3,0), (UInt)INSN(3,0) );
18889    }
18890
18891    /* Tell the dispatcher that this insn cannot be decoded, and so has
18892       not been executed, and (is currently) the next to be executed.
18893       R15 should be up-to-date since it made so at the start of each
18894       insn, but nevertheless be paranoid and update it again right
18895       now. */
18896    vassert(0 == (guest_R15_curr_instr_notENC & 3));
18897    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
18898    dres.len         = 0;
18899    dres.whatNext    = Dis_StopHere;
18900    dres.jk_StopHere = Ijk_NoDecode;
18901    dres.continueAt  = 0;
18902    return dres;
18903
18904   decode_success:
18905    /* All decode successes end up here. */
18906    DIP("\n");
18907
18908    vassert(dres.len == 4 || dres.len == 20);
18909
18910    /* Now then.  Do we have an implicit jump to r15 to deal with? */
18911    if (r15written) {
18912       /* If we get jump to deal with, we assume that there's been no
18913          other competing branch stuff previously generated for this
18914          insn.  That's reasonable, in the sense that the ARM insn set
18915          appears to declare as "Unpredictable" any instruction which
18916          generates more than one possible new value for r15.  Hence
18917          just assert.  The decoders themselves should check against
18918          all such instructions which are thusly Unpredictable, and
18919          decline to decode them.  Hence we should never get here if we
18920          have competing new values for r15, and hence it is safe to
18921          assert here. */
18922       vassert(dres.whatNext == Dis_Continue);
18923       vassert(irsb->next == NULL);
18924       vassert(irsb->jumpkind == Ijk_Boring);
18925       /* If r15 is unconditionally written, terminate the block by
18926          jumping to it.  If it's conditionally written, still
18927          terminate the block (a shame, but we can't do side exits to
18928          arbitrary destinations), but first jump to the next
18929          instruction if the condition doesn't hold. */
18930       /* We can't use getIReg(15) to get the destination, since that
18931          will produce r15+8, which isn't what we want.  Must use
18932          llGetIReg(15) instead. */
18933       if (r15guard == IRTemp_INVALID) {
18934          /* unconditional */
18935       } else {
18936          /* conditional */
18937          stmt( IRStmt_Exit(
18938                   unop(Iop_32to1,
18939                        binop(Iop_Xor32,
18940                              mkexpr(r15guard), mkU32(1))),
18941                   r15kind,
18942                   IRConst_U32(guest_R15_curr_instr_notENC + 4),
18943                   OFFB_R15T
18944          ));
18945       }
18946       /* This seems crazy, but we're required to finish the insn with
18947          a write to the guest PC.  As usual we rely on ir_opt to tidy
18948          up later. */
18949       llPutIReg(15, llGetIReg(15));
18950       dres.whatNext    = Dis_StopHere;
18951       dres.jk_StopHere = r15kind;
18952    } else {
18953       /* Set up the end-state in the normal way. */
18954       switch (dres.whatNext) {
18955          case Dis_Continue:
18956             llPutIReg(15, mkU32(dres.len + guest_R15_curr_instr_notENC));
18957             break;
18958          case Dis_ResteerU:
18959          case Dis_ResteerC:
18960             llPutIReg(15, mkU32(dres.continueAt));
18961             break;
18962          case Dis_StopHere:
18963             break;
18964          default:
18965             vassert(0);
18966       }
18967    }
18968
18969    return dres;
18970
18971 #  undef INSN_COND
18972 #  undef INSN
18973 }
18974
18975
18976 /*------------------------------------------------------------*/
18977 /*--- Disassemble a single Thumb2 instruction              ---*/
18978 /*------------------------------------------------------------*/
18979
18980 static const UChar it_length_table[256]; /* fwds */
18981
18982 /* NB: in Thumb mode we do fetches of regs with getIRegT, which
18983    automagically adds 4 to fetches of r15.  However, writes to regs
18984    are done with putIRegT, which disallows writes to r15.  Hence any
18985    r15 writes and associated jumps have to be done "by hand". */
18986
18987 /* Disassemble a single Thumb instruction into IR.  The instruction is
18988    located in host memory at guest_instr, and has (decoded) guest IP
18989    of guest_R15_curr_instr_notENC, which will have been set before the
18990    call here. */
18991
18992 static
18993 DisResult disInstr_THUMB_WRK (
18994              Bool         (*resteerOkFn) ( /*opaque*/void*, Addr ),
18995              Bool         resteerCisOk,
18996              void*        callback_opaque,
18997              const UChar* guest_instr,
18998              const VexArchInfo* archinfo,
18999              const VexAbiInfo*  abiinfo,
19000              Bool         sigill_diag
19001           )
19002 {
19003    /* A macro to fish bits out of insn0.  There's also INSN1, to fish
19004       bits out of insn1, but that's defined only after the end of the
19005       16-bit insn decoder, so as to stop it mistakenly being used
19006       therein. */
19007 #  define INSN0(_bMax,_bMin)  SLICE_UInt(((UInt)insn0), (_bMax), (_bMin))
19008
19009    DisResult dres;
19010    UShort    insn0; /*  first 16 bits of the insn */
19011    UShort    insn1; /* second 16 bits of the insn */
19012    HChar     dis_buf[128];  // big enough to hold LDMIA etc text
19013
19014    /* Summary result of the ITxxx backwards analysis: False == safe
19015       but suboptimal. */
19016    Bool guaranteedUnconditional = False;
19017
19018    /* Set result defaults. */
19019    dres.whatNext    = Dis_Continue;
19020    dres.len         = 2;
19021    dres.continueAt  = 0;
19022    dres.jk_StopHere = Ijk_INVALID;
19023    dres.hint        = Dis_HintNone;
19024
19025    /* Set default actions for post-insn handling of writes to r15, if
19026       required. */
19027    r15written = False;
19028    r15guard   = IRTemp_INVALID; /* unconditional */
19029    r15kind    = Ijk_Boring;
19030
19031    /* Insns could be 2 or 4 bytes long.  Just get the first 16 bits at
19032       this point.  If we need the second 16, get them later.  We can't
19033       get them both out immediately because it risks a fault (very
19034       unlikely, but ..) if the second 16 bits aren't actually
19035       necessary. */
19036    insn0 = getUShortLittleEndianly( guest_instr );
19037    insn1 = 0; /* We'll get it later, once we know we need it. */
19038
19039    /* Similarly, will set this later. */
19040    IRTemp old_itstate = IRTemp_INVALID;
19041
19042    if (0) vex_printf("insn: 0x%x\n", insn0);
19043
19044    DIP("\t(thumb) 0x%x:  ", (UInt)guest_R15_curr_instr_notENC);
19045
19046    vassert(0 == (guest_R15_curr_instr_notENC & 1));
19047
19048    /* ----------------------------------------------------------- */
19049    /* Spot "Special" instructions (see comment at top of file). */
19050    {
19051       const UChar* code = guest_instr;
19052       /* Spot the 16-byte preamble:
19053
19054          ea4f 0cfc  mov.w   ip, ip, ror #3
19055          ea4f 3c7c  mov.w   ip, ip, ror #13
19056          ea4f 7c7c  mov.w   ip, ip, ror #29
19057          ea4f 4cfc  mov.w   ip, ip, ror #19
19058       */
19059       UInt word1 = 0x0CFCEA4F;
19060       UInt word2 = 0x3C7CEA4F;
19061       UInt word3 = 0x7C7CEA4F;
19062       UInt word4 = 0x4CFCEA4F;
19063       if (getUIntLittleEndianly(code+ 0) == word1 &&
19064           getUIntLittleEndianly(code+ 4) == word2 &&
19065           getUIntLittleEndianly(code+ 8) == word3 &&
19066           getUIntLittleEndianly(code+12) == word4) {
19067          /* Got a "Special" instruction preamble.  Which one is it? */
19068          // 0x 0A 0A EA 4A
19069          if (getUIntLittleEndianly(code+16) == 0x0A0AEA4A
19070                                                /* orr.w r10,r10,r10 */) {
19071             /* R3 = client_request ( R4 ) */
19072             DIP("r3 = client_request ( %%r4 )\n");
19073             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
19074             dres.jk_StopHere = Ijk_ClientReq;
19075             dres.whatNext    = Dis_StopHere;
19076             goto decode_success;
19077          }
19078          else
19079          // 0x 0B 0B EA 4B
19080          if (getUIntLittleEndianly(code+16) == 0x0B0BEA4B
19081                                                /* orr r11,r11,r11 */) {
19082             /* R3 = guest_NRADDR */
19083             DIP("r3 = guest_NRADDR\n");
19084             dres.len = 20;
19085             llPutIReg(3, IRExpr_Get( OFFB_NRADDR, Ity_I32 ));
19086             goto decode_success;
19087          }
19088          else
19089          // 0x 0C 0C EA 4C
19090          if (getUIntLittleEndianly(code+16) == 0x0C0CEA4C
19091                                                /* orr r12,r12,r12 */) {
19092             /*  branch-and-link-to-noredir R4 */
19093             DIP("branch-and-link-to-noredir r4\n");
19094             llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
19095             llPutIReg(15, getIRegT(4));
19096             dres.jk_StopHere = Ijk_NoRedir;
19097             dres.whatNext    = Dis_StopHere;
19098             goto decode_success;
19099          }
19100          else
19101          // 0x 09 09 EA 49
19102          if (getUIntLittleEndianly(code+16) == 0x0909EA49
19103                                                /* orr r9,r9,r9 */) {
19104             /* IR injection */
19105             DIP("IR injection\n");
19106             vex_inject_ir(irsb, Iend_LE);
19107             // Invalidate the current insn. The reason is that the IRop we're
19108             // injecting here can change. In which case the translation has to
19109             // be redone. For ease of handling, we simply invalidate all the
19110             // time.
19111             stmt(IRStmt_Put(OFFB_CMSTART, mkU32(guest_R15_curr_instr_notENC)));
19112             stmt(IRStmt_Put(OFFB_CMLEN,   mkU32(20)));
19113             llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
19114             dres.whatNext    = Dis_StopHere;
19115             dres.jk_StopHere = Ijk_InvalICache;
19116             goto decode_success;
19117          }
19118          /* We don't know what it is.  Set insn0 so decode_failure
19119             can print the insn following the Special-insn preamble. */
19120          insn0 = getUShortLittleEndianly(code+16);
19121          goto decode_failure;
19122          /*NOTREACHED*/
19123       }
19124
19125    }
19126
19127    /* ----------------------------------------------------------- */
19128
19129    /* Main Thumb instruction decoder starts here.  It's a series of
19130       switches which examine ever longer bit sequences at the MSB of
19131       the instruction word, first for 16-bit insns, then for 32-bit
19132       insns. */
19133
19134    /* --- BEGIN ITxxx optimisation analysis --- */
19135    /* This is a crucial optimisation for the ITState boilerplate that
19136       follows.  Examine the 9 halfwords preceding this instruction,
19137       and if we are absolutely sure that none of them constitute an
19138       'it' instruction, then we can be sure that this instruction is
19139       not under the control of any 'it' instruction, and so
19140       guest_ITSTATE must be zero.  So write zero into ITSTATE right
19141       now, so that iropt can fold out almost all of the resulting
19142       junk.
19143
19144       If we aren't sure, we can always safely skip this step.  So be a
19145       bit conservative about it: only poke around in the same page as
19146       this instruction, lest we get a fault from the previous page
19147       that would not otherwise have happened.  The saving grace is
19148       that such skipping is pretty rare -- it only happens,
19149       statistically, 18/4096ths of the time, so is judged unlikely to
19150       be a performance problems.
19151
19152       FIXME: do better.  Take into account the number of insns covered
19153       by any IT insns we find, to rule out cases where an IT clearly
19154       cannot cover this instruction.  This would improve behaviour for
19155       branch targets immediately following an IT-guarded group that is
19156       not of full length.  Eg, (and completely ignoring issues of 16-
19157       vs 32-bit insn length):
19158
19159              ite cond
19160              insn1
19161              insn2
19162       label: insn3
19163              insn4
19164
19165       The 'it' only conditionalises insn1 and insn2.  However, the
19166       current analysis is conservative and considers insn3 and insn4
19167       also possibly guarded.  Hence if 'label:' is the start of a hot
19168       loop we will get a big performance hit.
19169    */
19170    {
19171       /* Summary result of this analysis: False == safe but
19172          suboptimal. */
19173       vassert(guaranteedUnconditional == False);
19174
19175       UInt pc = guest_R15_curr_instr_notENC;
19176       vassert(0 == (pc & 1));
19177
19178       UInt pageoff = pc & 0xFFF;
19179       if (pageoff >= 18) {
19180          /* It's safe to poke about in the 9 halfwords preceding this
19181             insn.  So, have a look at them. */
19182          guaranteedUnconditional = True; /* assume no 'it' insn found,
19183                                             till we do */
19184          UShort* hwp = (UShort*)(HWord)pc;
19185          Int i;
19186          for (i = -1; i >= -9; i--) {
19187             /* We're in the same page.  (True, but commented out due
19188                to expense.) */
19189             /*
19190             vassert( ( ((UInt)(&hwp[i])) & 0xFFFFF000 )
19191                       == ( pc & 0xFFFFF000 ) );
19192             */
19193             /* All valid IT instructions must have the form 0xBFxy,
19194                where x can be anything, but y must be nonzero.  Find
19195                the number of insns covered by it (1 .. 4) and check to
19196                see if it can possibly reach up to the instruction in
19197                question.  Some (x,y) combinations mean UNPREDICTABLE,
19198                and the table is constructed to be conservative by
19199                returning 4 for those cases, so the analysis is safe
19200                even if the code uses unpredictable IT instructions (in
19201                which case its authors are nuts, but hey.)  */
19202             UShort hwp_i = hwp[i];
19203             if (UNLIKELY((hwp_i & 0xFF00) == 0xBF00 && (hwp_i & 0xF) != 0)) {
19204                /* might be an 'it' insn. */
19205                /* # guarded insns */
19206                Int n_guarded = (Int)it_length_table[hwp_i & 0xFF];
19207                vassert(n_guarded >= 1 && n_guarded <= 4);
19208                if (n_guarded * 2 /* # guarded HWs, worst case */
19209                    > (-(i+1)))   /* -(i+1): # remaining HWs after the IT */
19210                    /* -(i+0) also seems to work, even though I think
19211                       it's wrong.  I don't understand that. */
19212                   guaranteedUnconditional = False;
19213                break;
19214             }
19215          }
19216       }
19217    }
19218    /* --- END ITxxx optimisation analysis --- */
19219
19220    /* Generate the guarding condition for this insn, by examining
19221       ITSTATE.  Assign it to condT.  Also, generate new
19222       values for ITSTATE ready for stuffing back into the
19223       guest state, but don't actually do the Put yet, since it will
19224       need to stuffed back in only after the instruction gets to a
19225       point where it is sure to complete.  Mostly we let the code at
19226       decode_success handle this, but in cases where the insn contains
19227       a side exit, we have to update them before the exit. */
19228
19229    /* If the ITxxx optimisation analysis above could not prove that
19230       this instruction is guaranteed unconditional, we insert a
19231       lengthy IR preamble to compute the guarding condition at
19232       runtime.  If it can prove it (which obviously we hope is the
19233       normal case) then we insert a minimal preamble, which is
19234       equivalent to setting guest_ITSTATE to zero and then folding
19235       that through the full preamble (which completely disappears). */
19236
19237    IRTemp condT              = IRTemp_INVALID;
19238    IRTemp cond_AND_notInIT_T = IRTemp_INVALID;
19239
19240    IRTemp new_itstate        = IRTemp_INVALID;
19241    vassert(old_itstate == IRTemp_INVALID);
19242
19243    if (guaranteedUnconditional) {
19244       /* BEGIN "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
19245
19246       // ITSTATE = 0 :: I32
19247       IRTemp z32 = newTemp(Ity_I32);
19248       assign(z32, mkU32(0));
19249       put_ITSTATE(z32);
19250
19251       // old_itstate = 0 :: I32
19252       //
19253       // old_itstate = get_ITSTATE();
19254       old_itstate = z32; /* 0 :: I32 */
19255
19256       // new_itstate = old_itstate >> 8
19257       //             = 0 >> 8
19258       //             = 0 :: I32
19259       //
19260       // new_itstate = newTemp(Ity_I32);
19261       // assign(new_itstate,
19262       //        binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
19263       new_itstate = z32;
19264
19265       // ITSTATE = 0 :: I32(again)
19266       //
19267       // put_ITSTATE(new_itstate);
19268
19269       // condT1 = calc_cond_dyn( xor(and(old_istate,0xF0), 0xE0) )
19270       //        = calc_cond_dyn( xor(0,0xE0) )
19271       //        = calc_cond_dyn ( 0xE0 )
19272       //        = 1 :: I32
19273       // Not that this matters, since the computed value is not used:
19274       // see condT folding below
19275       //
19276       // IRTemp condT1 = newTemp(Ity_I32);
19277       // assign(condT1,
19278       //        mk_armg_calculate_condition_dyn(
19279       //           binop(Iop_Xor32,
19280       //                 binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
19281       //                 mkU32(0xE0))
19282       //       )
19283       // );
19284
19285       // condT = 32to8(and32(old_itstate,0xF0)) == 0  ? 1  : condT1
19286       //       = 32to8(and32(0,0xF0)) == 0  ? 1  : condT1
19287       //       = 32to8(0) == 0  ? 1  : condT1
19288       //       = 0 == 0  ? 1  : condT1
19289       //       = 1
19290       //
19291       // condT = newTemp(Ity_I32);
19292       // assign(condT, IRExpr_ITE(
19293       //                  unop(Iop_32to8, binop(Iop_And32,
19294       //                                        mkexpr(old_itstate),
19295       //                                        mkU32(0xF0))),
19296       //                  mkexpr(condT1),
19297       //                  mkU32(1))
19298       //       ));
19299       condT = newTemp(Ity_I32);
19300       assign(condT, mkU32(1));
19301
19302       // notInITt = xor32(and32(old_itstate, 1), 1)
19303       //          = xor32(and32(0, 1), 1)
19304       //          = xor32(0, 1)
19305       //          = 1 :: I32
19306       //
19307       // IRTemp notInITt = newTemp(Ity_I32);
19308       // assign(notInITt,
19309       //        binop(Iop_Xor32,
19310       //              binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
19311       //              mkU32(1)));
19312
19313       // cond_AND_notInIT_T = and32(notInITt, condT)
19314       //                    = and32(1, 1)
19315       //                    = 1
19316       //
19317       // cond_AND_notInIT_T = newTemp(Ity_I32);
19318       // assign(cond_AND_notInIT_T,
19319       //        binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
19320       cond_AND_notInIT_T = condT; /* 1 :: I32 */
19321
19322       /* END "partial eval { ITSTATE = 0; STANDARD_PREAMBLE; }" */
19323    } else {
19324       /* BEGIN { STANDARD PREAMBLE; } */
19325
19326       old_itstate = get_ITSTATE();
19327
19328       new_itstate = newTemp(Ity_I32);
19329       assign(new_itstate,
19330              binop(Iop_Shr32, mkexpr(old_itstate), mkU8(8)));
19331
19332       put_ITSTATE(new_itstate);
19333
19334       /* Same strategy as for ARM insns: generate a condition
19335          temporary at this point (or IRTemp_INVALID, meaning
19336          unconditional).  We leave it to lower-level instruction
19337          decoders to decide whether they can generate straight-line
19338          code, or whether they must generate a side exit before the
19339          instruction.  condT :: Ity_I32 and is always either zero or
19340          one. */
19341       IRTemp condT1 = newTemp(Ity_I32);
19342       assign(condT1,
19343              mk_armg_calculate_condition_dyn(
19344                 binop(Iop_Xor32,
19345                       binop(Iop_And32, mkexpr(old_itstate), mkU32(0xF0)),
19346                       mkU32(0xE0))
19347             )
19348       );
19349
19350       /* This is a bit complex, but needed to make Memcheck understand
19351          that, if the condition in old_itstate[7:4] denotes AL (that
19352          is, if this instruction is to be executed unconditionally),
19353          then condT does not depend on the results of calling the
19354          helper.
19355
19356          We test explicitly for old_itstate[7:4] == AL ^ 0xE, and in
19357          that case set condT directly to 1.  Else we use the results
19358          of the helper.  Since old_itstate is always defined and
19359          because Memcheck does lazy V-bit propagation through ITE,
19360          this will cause condT to always be a defined 1 if the
19361          condition is 'AL'.  From an execution semantics point of view
19362          this is irrelevant since we're merely duplicating part of the
19363          behaviour of the helper.  But it makes it clear to Memcheck,
19364          in this case, that condT does not in fact depend on the
19365          contents of the condition code thunk.  Without it, we get
19366          quite a lot of false errors.
19367
19368          So, just to clarify: from a straight semantics point of view,
19369          we can simply do "assign(condT, mkexpr(condT1))", and the
19370          simulator still runs fine.  It's just that we get loads of
19371          false errors from Memcheck. */
19372       condT = newTemp(Ity_I32);
19373       assign(condT, IRExpr_ITE(
19374                        binop(Iop_CmpNE32, binop(Iop_And32,
19375                                                 mkexpr(old_itstate),
19376                                                 mkU32(0xF0)),
19377                                           mkU32(0)),
19378                        mkexpr(condT1),
19379                        mkU32(1)
19380             ));
19381
19382       /* Something we don't have in ARM: generate a 0 or 1 value
19383          indicating whether or not we are in an IT block (NB: 0 = in
19384          IT block, 1 = not in IT block).  This is used to gate
19385          condition code updates in 16-bit Thumb instructions. */
19386       IRTemp notInITt = newTemp(Ity_I32);
19387       assign(notInITt,
19388              binop(Iop_Xor32,
19389                    binop(Iop_And32, mkexpr(old_itstate), mkU32(1)),
19390                    mkU32(1)));
19391
19392       /* Compute 'condT && notInITt' -- that is, the instruction is
19393          going to execute, and we're not in an IT block.  This is the
19394          gating condition for updating condition codes in 16-bit Thumb
19395          instructions, except for CMP, CMN and TST. */
19396       cond_AND_notInIT_T = newTemp(Ity_I32);
19397       assign(cond_AND_notInIT_T,
19398              binop(Iop_And32, mkexpr(notInITt), mkexpr(condT)));
19399       /* END { STANDARD PREAMBLE; } */
19400    }
19401
19402
19403    /* At this point:
19404       * ITSTATE has been updated
19405       * condT holds the guarding condition for this instruction (0 or 1),
19406       * notInITt is 1 if we're in "normal" code, 0 if in an IT block
19407       * cond_AND_notInIT_T is the AND of the above two.
19408
19409       If the instruction proper can't trap, then there's nothing else
19410       to do w.r.t. ITSTATE -- just go and and generate IR for the
19411       insn, taking into account the guarding condition.
19412
19413       If, however, the instruction might trap, then we must back up
19414       ITSTATE to the old value, and re-update it after the potentially
19415       trapping IR section.  A trap can happen either via a memory
19416       reference or because we need to throw SIGILL.
19417
19418       If an instruction has a side exit, we need to be sure that any
19419       ITSTATE backup is re-updated before the side exit.
19420    */
19421
19422    /* ----------------------------------------------------------- */
19423    /* --                                                       -- */
19424    /* -- Thumb 16-bit integer instructions                     -- */
19425    /* --                                                       -- */
19426    /* -- IMPORTANT: references to insn1 or INSN1 are           -- */
19427    /* --            not allowed in this section                -- */
19428    /* --                                                       -- */
19429    /* ----------------------------------------------------------- */
19430
19431    /* 16-bit instructions inside an IT block, apart from CMP, CMN and
19432       TST, do not set the condition codes.  Hence we must dynamically
19433       test for this case for every condition code update. */
19434
19435    IROp   anOp   = Iop_INVALID;
19436    const HChar* anOpNm = NULL;
19437
19438    /* ================ 16-bit 15:6 cases ================ */
19439
19440    switch (INSN0(15,6)) {
19441
19442    case 0x10a:   // CMP
19443    case 0x10b: { // CMN
19444       /* ---------------- CMP Rn, Rm ---------------- */
19445       Bool   isCMN = INSN0(15,6) == 0x10b;
19446       UInt   rN    = INSN0(2,0);
19447       UInt   rM    = INSN0(5,3);
19448       IRTemp argL  = newTemp(Ity_I32);
19449       IRTemp argR  = newTemp(Ity_I32);
19450       assign( argL, getIRegT(rN) );
19451       assign( argR, getIRegT(rM) );
19452       /* Update flags regardless of whether in an IT block or not. */
19453       setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
19454                       argL, argR, condT );
19455       DIP("%s r%u, r%u\n", isCMN ? "cmn" : "cmp", rN, rM);
19456       goto decode_success;
19457    }
19458
19459    case 0x108: {
19460       /* ---------------- TST Rn, Rm ---------------- */
19461       UInt   rN   = INSN0(2,0);
19462       UInt   rM   = INSN0(5,3);
19463       IRTemp oldC = newTemp(Ity_I32);
19464       IRTemp oldV = newTemp(Ity_I32);
19465       IRTemp res  = newTemp(Ity_I32);
19466       assign( oldC, mk_armg_calculate_flag_c() );
19467       assign( oldV, mk_armg_calculate_flag_v() );
19468       assign( res,  binop(Iop_And32, getIRegT(rN), getIRegT(rM)) );
19469       /* Update flags regardless of whether in an IT block or not. */
19470       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
19471       DIP("tst r%u, r%u\n", rN, rM);
19472       goto decode_success;
19473    }
19474
19475    case 0x109: {
19476       /* ---------------- NEGS Rd, Rm ---------------- */
19477       /* Rd = -Rm */
19478       UInt   rM   = INSN0(5,3);
19479       UInt   rD   = INSN0(2,0);
19480       IRTemp arg  = newTemp(Ity_I32);
19481       IRTemp zero = newTemp(Ity_I32);
19482       assign(arg, getIRegT(rM));
19483       assign(zero, mkU32(0));
19484       // rD can never be r15
19485       putIRegT(rD, binop(Iop_Sub32, mkexpr(zero), mkexpr(arg)), condT);
19486       setFlags_D1_D2( ARMG_CC_OP_SUB, zero, arg, cond_AND_notInIT_T);
19487       DIP("negs r%u, r%u\n", rD, rM);
19488       goto decode_success;
19489    }
19490
19491    case 0x10F: {
19492       /* ---------------- MVNS Rd, Rm ---------------- */
19493       /* Rd = ~Rm */
19494       UInt   rM   = INSN0(5,3);
19495       UInt   rD   = INSN0(2,0);
19496       IRTemp oldV = newTemp(Ity_I32);
19497       IRTemp oldC = newTemp(Ity_I32);
19498       IRTemp res  = newTemp(Ity_I32);
19499       assign( oldV, mk_armg_calculate_flag_v() );
19500       assign( oldC, mk_armg_calculate_flag_c() );
19501       assign(res, unop(Iop_Not32, getIRegT(rM)));
19502       // rD can never be r15
19503       putIRegT(rD, mkexpr(res), condT);
19504       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19505                          cond_AND_notInIT_T );
19506       DIP("mvns r%u, r%u\n", rD, rM);
19507       goto decode_success;
19508    }
19509
19510    case 0x10C:
19511       /* ---------------- ORRS Rd, Rm ---------------- */
19512       anOp = Iop_Or32; anOpNm = "orr"; goto and_orr_eor_mul;
19513    case 0x100:
19514       /* ---------------- ANDS Rd, Rm ---------------- */
19515       anOp = Iop_And32; anOpNm = "and"; goto and_orr_eor_mul;
19516    case 0x101:
19517       /* ---------------- EORS Rd, Rm ---------------- */
19518       anOp = Iop_Xor32; anOpNm = "eor"; goto and_orr_eor_mul;
19519    case 0x10d:
19520       /* ---------------- MULS Rd, Rm ---------------- */
19521       anOp = Iop_Mul32; anOpNm = "mul"; goto and_orr_eor_mul;
19522    and_orr_eor_mul: {
19523       /* Rd = Rd `op` Rm */
19524       UInt   rM   = INSN0(5,3);
19525       UInt   rD   = INSN0(2,0);
19526       IRTemp res  = newTemp(Ity_I32);
19527       IRTemp oldV = newTemp(Ity_I32);
19528       IRTemp oldC = newTemp(Ity_I32);
19529       assign( oldV, mk_armg_calculate_flag_v() );
19530       assign( oldC, mk_armg_calculate_flag_c() );
19531       assign( res, binop(anOp, getIRegT(rD), getIRegT(rM) ));
19532       // not safe to read guest state after here
19533       // rD can never be r15
19534       putIRegT(rD, mkexpr(res), condT);
19535       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19536                          cond_AND_notInIT_T );
19537       DIP("%s r%u, r%u\n", anOpNm, rD, rM);
19538       goto decode_success;
19539    }
19540
19541    case 0x10E: {
19542       /* ---------------- BICS Rd, Rm ---------------- */
19543       /* Rd = Rd & ~Rm */
19544       UInt   rM   = INSN0(5,3);
19545       UInt   rD   = INSN0(2,0);
19546       IRTemp res  = newTemp(Ity_I32);
19547       IRTemp oldV = newTemp(Ity_I32);
19548       IRTemp oldC = newTemp(Ity_I32);
19549       assign( oldV, mk_armg_calculate_flag_v() );
19550       assign( oldC, mk_armg_calculate_flag_c() );
19551       assign( res, binop(Iop_And32, getIRegT(rD),
19552                                     unop(Iop_Not32, getIRegT(rM) )));
19553       // not safe to read guest state after here
19554       // rD can never be r15
19555       putIRegT(rD, mkexpr(res), condT);
19556       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
19557                          cond_AND_notInIT_T );
19558       DIP("bics r%u, r%u\n", rD, rM);
19559       goto decode_success;
19560    }
19561
19562    case 0x105: {
19563       /* ---------------- ADCS Rd, Rm ---------------- */
19564       /* Rd = Rd + Rm + oldC */
19565       UInt   rM   = INSN0(5,3);
19566       UInt   rD   = INSN0(2,0);
19567       IRTemp argL = newTemp(Ity_I32);
19568       IRTemp argR = newTemp(Ity_I32);
19569       IRTemp oldC = newTemp(Ity_I32);
19570       IRTemp res  = newTemp(Ity_I32);
19571       assign(argL, getIRegT(rD));
19572       assign(argR, getIRegT(rM));
19573       assign(oldC, mk_armg_calculate_flag_c());
19574       assign(res, binop(Iop_Add32,
19575                         binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
19576                         mkexpr(oldC)));
19577       // rD can never be r15
19578       putIRegT(rD, mkexpr(res), condT);
19579       setFlags_D1_D2_ND( ARMG_CC_OP_ADC, argL, argR, oldC,
19580                          cond_AND_notInIT_T );
19581       DIP("adcs r%u, r%u\n", rD, rM);
19582       goto decode_success;
19583    }
19584
19585    case 0x106: {
19586       /* ---------------- SBCS Rd, Rm ---------------- */
19587       /* Rd = Rd - Rm - (oldC ^ 1) */
19588       UInt   rM   = INSN0(5,3);
19589       UInt   rD   = INSN0(2,0);
19590       IRTemp argL = newTemp(Ity_I32);
19591       IRTemp argR = newTemp(Ity_I32);
19592       IRTemp oldC = newTemp(Ity_I32);
19593       IRTemp res  = newTemp(Ity_I32);
19594       assign(argL, getIRegT(rD));
19595       assign(argR, getIRegT(rM));
19596       assign(oldC, mk_armg_calculate_flag_c());
19597       assign(res, binop(Iop_Sub32,
19598                         binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
19599                         binop(Iop_Xor32, mkexpr(oldC), mkU32(1))));
19600       // rD can never be r15
19601       putIRegT(rD, mkexpr(res), condT);
19602       setFlags_D1_D2_ND( ARMG_CC_OP_SBB, argL, argR, oldC,
19603                          cond_AND_notInIT_T );
19604       DIP("sbcs r%u, r%u\n", rD, rM);
19605       goto decode_success;
19606    }
19607
19608    case 0x2CB: {
19609       /* ---------------- UXTB Rd, Rm ---------------- */
19610       /* Rd = 8Uto32(Rm) */
19611       UInt rM = INSN0(5,3);
19612       UInt rD = INSN0(2,0);
19613       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFF)),
19614                    condT);
19615       DIP("uxtb r%u, r%u\n", rD, rM);
19616       goto decode_success;
19617    }
19618
19619    case 0x2C9: {
19620       /* ---------------- SXTB Rd, Rm ---------------- */
19621       /* Rd = 8Sto32(Rm) */
19622       UInt rM = INSN0(5,3);
19623       UInt rD = INSN0(2,0);
19624       putIRegT(rD, binop(Iop_Sar32,
19625                          binop(Iop_Shl32, getIRegT(rM), mkU8(24)),
19626                          mkU8(24)),
19627                    condT);
19628       DIP("sxtb r%u, r%u\n", rD, rM);
19629       goto decode_success;
19630    }
19631
19632    case 0x2CA: {
19633       /* ---------------- UXTH Rd, Rm ---------------- */
19634       /* Rd = 16Uto32(Rm) */
19635       UInt rM = INSN0(5,3);
19636       UInt rD = INSN0(2,0);
19637       putIRegT(rD, binop(Iop_And32, getIRegT(rM), mkU32(0xFFFF)),
19638                    condT);
19639       DIP("uxth r%u, r%u\n", rD, rM);
19640       goto decode_success;
19641    }
19642
19643    case 0x2C8: {
19644       /* ---------------- SXTH Rd, Rm ---------------- */
19645       /* Rd = 16Sto32(Rm) */
19646       UInt rM = INSN0(5,3);
19647       UInt rD = INSN0(2,0);
19648       putIRegT(rD, binop(Iop_Sar32,
19649                          binop(Iop_Shl32, getIRegT(rM), mkU8(16)),
19650                          mkU8(16)),
19651                    condT);
19652       DIP("sxth r%u, r%u\n", rD, rM);
19653       goto decode_success;
19654    }
19655
19656    case 0x102:   // LSLS
19657    case 0x103:   // LSRS
19658    case 0x104:   // ASRS
19659    case 0x107: { // RORS
19660       /* ---------------- LSLS Rs, Rd ---------------- */
19661       /* ---------------- LSRS Rs, Rd ---------------- */
19662       /* ---------------- ASRS Rs, Rd ---------------- */
19663       /* ---------------- RORS Rs, Rd ---------------- */
19664       /* Rd = Rd `op` Rs, and set flags */
19665       UInt   rS   = INSN0(5,3);
19666       UInt   rD   = INSN0(2,0);
19667       IRTemp oldV = newTemp(Ity_I32);
19668       IRTemp rDt  = newTemp(Ity_I32);
19669       IRTemp rSt  = newTemp(Ity_I32);
19670       IRTemp res  = newTemp(Ity_I32);
19671       IRTemp resC = newTemp(Ity_I32);
19672       const HChar* wot  = "???";
19673       assign(rSt, getIRegT(rS));
19674       assign(rDt, getIRegT(rD));
19675       assign(oldV, mk_armg_calculate_flag_v());
19676       /* Does not appear to be the standard 'how' encoding. */
19677       switch (INSN0(15,6)) {
19678          case 0x102:
19679             compute_result_and_C_after_LSL_by_reg(
19680                dis_buf, &res, &resC, rDt, rSt, rD, rS
19681             );
19682             wot = "lsl";
19683             break;
19684          case 0x103:
19685             compute_result_and_C_after_LSR_by_reg(
19686                dis_buf, &res, &resC, rDt, rSt, rD, rS
19687             );
19688             wot = "lsr";
19689             break;
19690          case 0x104:
19691             compute_result_and_C_after_ASR_by_reg(
19692                dis_buf, &res, &resC, rDt, rSt, rD, rS
19693             );
19694             wot = "asr";
19695             break;
19696          case 0x107:
19697             compute_result_and_C_after_ROR_by_reg(
19698                dis_buf, &res, &resC, rDt, rSt, rD, rS
19699             );
19700             wot = "ror";
19701             break;
19702          default:
19703             /*NOTREACHED*/vassert(0);
19704       }
19705       // not safe to read guest state after this point
19706       putIRegT(rD, mkexpr(res), condT);
19707       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
19708                          cond_AND_notInIT_T );
19709       DIP("%ss r%u, r%u\n", wot, rS, rD);
19710       goto decode_success;
19711    }
19712
19713    case 0x2E8:   // REV
19714    case 0x2E9: { // REV16
19715       /* ---------------- REV   Rd, Rm ---------------- */
19716       /* ---------------- REV16 Rd, Rm ---------------- */
19717       UInt rM = INSN0(5,3);
19718       UInt rD = INSN0(2,0);
19719       Bool isREV = INSN0(15,6) == 0x2E8;
19720       IRTemp arg = newTemp(Ity_I32);
19721       assign(arg, getIRegT(rM));
19722       IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
19723       putIRegT(rD, mkexpr(res), condT);
19724       DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM);
19725       goto decode_success;
19726    }
19727
19728    case 0x2EB: { // REVSH
19729       /* ---------------- REVSH Rd, Rn ---------------- */
19730       UInt rM = INSN0(5,3);
19731       UInt rD = INSN0(2,0);
19732       IRTemp irt_rM  = newTemp(Ity_I32);
19733       IRTemp irt_hi  = newTemp(Ity_I32);
19734       IRTemp irt_low = newTemp(Ity_I32);
19735       IRTemp irt_res = newTemp(Ity_I32);
19736       assign(irt_rM, getIRegT(rM));
19737       assign(irt_hi,
19738              binop(Iop_Sar32,
19739                    binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
19740                    mkU8(16)
19741              )
19742       );
19743       assign(irt_low,
19744              binop(Iop_And32,
19745                    binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
19746                    mkU32(0xFF)
19747              )
19748       );
19749       assign(irt_res,
19750              binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
19751       );
19752       putIRegT(rD, mkexpr(irt_res), condT);
19753       DIP("revsh r%u, r%u\n", rD, rM);
19754       goto decode_success;
19755    }
19756
19757    default:
19758       break; /* examine the next shortest prefix */
19759
19760    }
19761
19762
19763    /* ================ 16-bit 15:7 cases ================ */
19764
19765    switch (INSN0(15,7)) {
19766
19767    case BITS9(1,0,1,1,0,0,0,0,0): {
19768       /* ------------ ADD SP, #imm7 * 4 ------------ */
19769       UInt uimm7 = INSN0(6,0);
19770       putIRegT(13, binop(Iop_Add32, getIRegT(13), mkU32(uimm7 * 4)),
19771                    condT);
19772       DIP("add sp, #%u\n", uimm7 * 4);
19773       goto decode_success;
19774    }
19775
19776    case BITS9(1,0,1,1,0,0,0,0,1): {
19777       /* ------------ SUB SP, #imm7 * 4 ------------ */
19778       UInt uimm7 = INSN0(6,0);
19779       putIRegT(13, binop(Iop_Sub32, getIRegT(13), mkU32(uimm7 * 4)),
19780                    condT);
19781       DIP("sub sp, #%u\n", uimm7 * 4);
19782       goto decode_success;
19783    }
19784
19785    case BITS9(0,1,0,0,0,1,1,1,0): {
19786       /* ---------------- BX rM ---------------- */
19787       /* Branch to reg, and optionally switch modes.  Reg contains a
19788          suitably encoded address therefore (w CPSR.T at the bottom).
19789          Have to special-case r15, as usual. */
19790       UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
19791       if (BITS3(0,0,0) == INSN0(2,0)) {
19792          IRTemp dst = newTemp(Ity_I32);
19793          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19794          mk_skip_over_T16_if_cond_is_false(condT);
19795          condT = IRTemp_INVALID;
19796          // now uncond
19797          if (rM <= 14) {
19798             assign( dst, getIRegT(rM) );
19799          } else {
19800             vassert(rM == 15);
19801             assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
19802          }
19803          llPutIReg(15, mkexpr(dst));
19804          dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
19805          dres.whatNext    = Dis_StopHere;
19806          DIP("bx r%u (possibly switch to ARM mode)\n", rM);
19807          goto decode_success;
19808       }
19809       break;
19810    }
19811
19812    /* ---------------- BLX rM ---------------- */
19813    /* Branch and link to interworking address in rM. */
19814    case BITS9(0,1,0,0,0,1,1,1,1): {
19815       if (BITS3(0,0,0) == INSN0(2,0)) {
19816          UInt rM = (INSN0(6,6) << 3) | INSN0(5,3);
19817          IRTemp dst = newTemp(Ity_I32);
19818          if (rM <= 14) {
19819             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19820             mk_skip_over_T16_if_cond_is_false(condT);
19821             condT = IRTemp_INVALID;
19822             // now uncond
19823             /* We're returning to Thumb code, hence "| 1" */
19824             assign( dst, getIRegT(rM) );
19825             putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
19826                           IRTemp_INVALID );
19827             llPutIReg(15, mkexpr(dst));
19828             dres.jk_StopHere = Ijk_Call;
19829             dres.whatNext    = Dis_StopHere;
19830             DIP("blx r%u (possibly switch to ARM mode)\n", rM);
19831             goto decode_success;
19832          }
19833          /* else unpredictable, fall through */
19834       }
19835       break;
19836    }
19837
19838    default:
19839       break; /* examine the next shortest prefix */
19840
19841    }
19842
19843
19844    /* ================ 16-bit 15:8 cases ================ */
19845
19846    switch (INSN0(15,8)) {
19847
19848    case BITS8(1,1,0,1,1,1,1,1): {
19849       /* ---------------- SVC ---------------- */
19850       UInt imm8 = INSN0(7,0);
19851       if (imm8 == 0) {
19852          /* A syscall.  We can't do this conditionally, hence: */
19853          mk_skip_over_T16_if_cond_is_false( condT );
19854          // FIXME: what if we have to back up and restart this insn?
19855          // then ITSTATE will be wrong (we'll have it as "used")
19856          // when it isn't.  Correct is to save ITSTATE in a
19857          // stash pseudo-reg, and back up from that if we have to
19858          // restart.
19859          // uncond after here
19860          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ));
19861          dres.jk_StopHere = Ijk_Sys_syscall;
19862          dres.whatNext    = Dis_StopHere;
19863          DIP("svc #0x%08x\n", imm8);
19864          goto decode_success;
19865       }
19866       /* else fall through */
19867       break;
19868    }
19869
19870    case BITS8(0,1,0,0,0,1,0,0): {
19871       /* ---------------- ADD(HI) Rd, Rm ---------------- */
19872       UInt h1 = INSN0(7,7);
19873       UInt h2 = INSN0(6,6);
19874       UInt rM = (h2 << 3) | INSN0(5,3);
19875       UInt rD = (h1 << 3) | INSN0(2,0);
19876       //if (h1 == 0 && h2 == 0) { // Original T1 was more restrictive
19877       if (rD == 15 && rM == 15) {
19878          // then it's invalid
19879       } else {
19880          IRTemp res = newTemp(Ity_I32);
19881          assign( res, binop(Iop_Add32, getIRegT(rD), getIRegT(rM) ));
19882          if (rD != 15) {
19883             putIRegT( rD, mkexpr(res), condT );
19884          } else {
19885             /* Only allowed outside or last-in IT block; SIGILL if not so. */
19886             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19887             /* jump over insn if not selected */
19888             mk_skip_over_T16_if_cond_is_false(condT);
19889             condT = IRTemp_INVALID;
19890             // now uncond
19891             /* non-interworking branch */
19892             llPutIReg(15, binop(Iop_Or32, mkexpr(res), mkU32(1)));
19893             dres.jk_StopHere = Ijk_Boring;
19894             dres.whatNext    = Dis_StopHere;
19895          }
19896          DIP("add(hi) r%u, r%u\n", rD, rM);
19897          goto decode_success;
19898       }
19899       break;
19900    }
19901
19902    case BITS8(0,1,0,0,0,1,0,1): {
19903       /* ---------------- CMP(HI) Rd, Rm ---------------- */
19904       UInt h1 = INSN0(7,7);
19905       UInt h2 = INSN0(6,6);
19906       UInt rM = (h2 << 3) | INSN0(5,3);
19907       UInt rN = (h1 << 3) | INSN0(2,0);
19908       if (h1 != 0 || h2 != 0) {
19909          IRTemp argL  = newTemp(Ity_I32);
19910          IRTemp argR  = newTemp(Ity_I32);
19911          assign( argL, getIRegT(rN) );
19912          assign( argR, getIRegT(rM) );
19913          /* Update flags regardless of whether in an IT block or not. */
19914          setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
19915          DIP("cmphi r%u, r%u\n", rN, rM);
19916          goto decode_success;
19917       }
19918       break;
19919    }
19920
19921    case BITS8(0,1,0,0,0,1,1,0): {
19922       /* ---------------- MOV(HI) Rd, Rm ---------------- */
19923       UInt h1 = INSN0(7,7);
19924       UInt h2 = INSN0(6,6);
19925       UInt rM = (h2 << 3) | INSN0(5,3);
19926       UInt rD = (h1 << 3) | INSN0(2,0);
19927       /* The old ARM ARM seems to disallow the case where both Rd and
19928          Rm are "low" registers, but newer versions allow it. */
19929       if (1 /*h1 != 0 || h2 != 0*/) {
19930          IRTemp val = newTemp(Ity_I32);
19931          assign( val, getIRegT(rM) );
19932          if (rD != 15) {
19933             putIRegT( rD, mkexpr(val), condT );
19934          } else {
19935             /* Only allowed outside or last-in IT block; SIGILL if not so. */
19936             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
19937             /* jump over insn if not selected */
19938             mk_skip_over_T16_if_cond_is_false(condT);
19939             condT = IRTemp_INVALID;
19940             // now uncond
19941             /* non-interworking branch */
19942             llPutIReg(15, binop(Iop_Or32, mkexpr(val), mkU32(1)));
19943             dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
19944             dres.whatNext    = Dis_StopHere;
19945          }
19946          DIP("mov r%u, r%u\n", rD, rM);
19947          goto decode_success;
19948       }
19949       break;
19950    }
19951
19952    case BITS8(1,0,1,1,1,1,1,1): {
19953       /* ---------------- IT (if-then) ---------------- */
19954       UInt firstcond = INSN0(7,4);
19955       UInt mask = INSN0(3,0);
19956       UInt newITSTATE = 0;
19957       /* This is the ITSTATE represented as described in
19958          libvex_guest_arm.h.  It is not the ARM ARM representation. */
19959       HChar c1 = '.';
19960       HChar c2 = '.';
19961       HChar c3 = '.';
19962       Bool valid = compute_ITSTATE( &newITSTATE, &c1, &c2, &c3,
19963                                     firstcond, mask );
19964       if (valid && firstcond != 0xF/*NV*/) {
19965          /* Not allowed in an IT block; SIGILL if so. */
19966          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
19967
19968          IRTemp t = newTemp(Ity_I32);
19969          assign(t, mkU32(newITSTATE));
19970          put_ITSTATE(t);
19971
19972          DIP("it%c%c%c %s\n", c1, c2, c3, nCC(firstcond));
19973          goto decode_success;
19974       }
19975       break;
19976    }
19977
19978    case BITS8(1,0,1,1,0,0,0,1):
19979    case BITS8(1,0,1,1,0,0,1,1):
19980    case BITS8(1,0,1,1,1,0,0,1):
19981    case BITS8(1,0,1,1,1,0,1,1): {
19982       /* ---------------- CB{N}Z ---------------- */
19983       UInt rN    = INSN0(2,0);
19984       UInt bOP   = INSN0(11,11);
19985       UInt imm32 = (INSN0(9,9) << 6) | (INSN0(7,3) << 1);
19986       gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
19987       /* It's a conditional branch forward. */
19988       IRTemp kond = newTemp(Ity_I1);
19989       assign( kond, binop(bOP ? Iop_CmpNE32 : Iop_CmpEQ32,
19990                           getIRegT(rN), mkU32(0)) );
19991
19992       vassert(0 == (guest_R15_curr_instr_notENC & 1));
19993       /* Looks like the nearest insn we can branch to is the one after
19994          next.  That makes sense, as there's no point in being able to
19995          encode a conditional branch to the next instruction. */
19996       UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
19997       stmt(IRStmt_Exit( mkexpr(kond),
19998                         Ijk_Boring,
19999                         IRConst_U32(toUInt(dst)),
20000                         OFFB_R15T ));
20001       DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
20002       goto decode_success;
20003    }
20004
20005    default:
20006       break; /* examine the next shortest prefix */
20007
20008    }
20009
20010
20011    /* ================ 16-bit 15:9 cases ================ */
20012
20013    switch (INSN0(15,9)) {
20014
20015    case BITS7(1,0,1,1,0,1,0): {
20016       /* ---------------- PUSH ---------------- */
20017       /* This is a bit like STMxx, but way simpler. Complications we
20018          don't have to deal with:
20019          * SP being one of the transferred registers
20020          * direction (increment vs decrement)
20021          * before-vs-after-ness
20022       */
20023       Int  i, nRegs;
20024       UInt bitR    = INSN0(8,8);
20025       UInt regList = INSN0(7,0);
20026       if (bitR) regList |= (1 << 14);
20027
20028       /* At least one register must be transferred, else result is
20029          UNPREDICTABLE. */
20030       if (regList != 0) {
20031          /* Since we can't generate a guaranteed non-trapping IR
20032             sequence, (1) jump over the insn if it is gated false, and
20033             (2) back out the ITSTATE update. */
20034          mk_skip_over_T16_if_cond_is_false(condT);
20035          condT = IRTemp_INVALID;
20036          put_ITSTATE(old_itstate);
20037          // now uncond
20038
20039          nRegs = 0;
20040          for (i = 0; i < 16; i++) {
20041             if ((regList & (1 << i)) != 0)
20042                nRegs++;
20043          }
20044          vassert(nRegs >= 1 && nRegs <= 9);
20045
20046          /* Move SP down first of all, so we're "covered".  And don't
20047             mess with its alignment. */
20048          IRTemp newSP = newTemp(Ity_I32);
20049          assign(newSP, binop(Iop_Sub32, getIRegT(13), mkU32(4 * nRegs)));
20050          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
20051
20052          /* Generate a transfer base address as a forced-aligned
20053             version of the final SP value. */
20054          IRTemp base = newTemp(Ity_I32);
20055          assign(base, binop(Iop_And32, mkexpr(newSP), mkU32(~3)));
20056
20057          /* Now the transfers */
20058          nRegs = 0;
20059          for (i = 0; i < 16; i++) {
20060             if ((regList & (1 << i)) != 0) {
20061                storeLE( binop(Iop_Add32, mkexpr(base), mkU32(4 * nRegs)),
20062                         getIRegT(i) );
20063                nRegs++;
20064             }
20065          }
20066
20067          /* Reinstate the ITSTATE update. */
20068          put_ITSTATE(new_itstate);
20069
20070          DIP("push {%s0x%04x}\n", bitR ? "lr," : "", regList & 0xFF);
20071          goto decode_success;
20072       }
20073       break;
20074    }
20075
20076    case BITS7(1,0,1,1,1,1,0): {
20077       /* ---------------- POP ---------------- */
20078       Int  i, nRegs;
20079       UInt bitR    = INSN0(8,8);
20080       UInt regList = INSN0(7,0);
20081
20082       /* At least one register must be transferred, else result is
20083          UNPREDICTABLE. */
20084       if (regList != 0 || bitR) {
20085          /* Since we can't generate a guaranteed non-trapping IR
20086             sequence, (1) jump over the insn if it is gated false, and
20087             (2) back out the ITSTATE update. */
20088          mk_skip_over_T16_if_cond_is_false(condT);
20089          condT = IRTemp_INVALID;
20090          put_ITSTATE(old_itstate);
20091          // now uncond
20092
20093          nRegs = 0;
20094          for (i = 0; i < 8; i++) {
20095             if ((regList & (1 << i)) != 0)
20096                nRegs++;
20097          }
20098          vassert(nRegs >= 0 && nRegs <= 8);
20099          vassert(bitR == 0 || bitR == 1);
20100
20101          IRTemp oldSP = newTemp(Ity_I32);
20102          assign(oldSP, getIRegT(13));
20103
20104          /* Generate a transfer base address as a forced-aligned
20105             version of the original SP value. */
20106          IRTemp base = newTemp(Ity_I32);
20107          assign(base, binop(Iop_And32, mkexpr(oldSP), mkU32(~3)));
20108
20109          /* Compute a new value for SP, but don't install it yet, so
20110             that we're "covered" until all the transfers are done.
20111             And don't mess with its alignment. */
20112          IRTemp newSP = newTemp(Ity_I32);
20113          assign(newSP, binop(Iop_Add32, mkexpr(oldSP),
20114                                         mkU32(4 * (nRegs + bitR))));
20115
20116          /* Now the transfers, not including PC */
20117          nRegs = 0;
20118          for (i = 0; i < 8; i++) {
20119             if ((regList & (1 << i)) != 0) {
20120                putIRegT(i, loadLE( Ity_I32,
20121                                    binop(Iop_Add32, mkexpr(base),
20122                                                     mkU32(4 * nRegs))),
20123                            IRTemp_INVALID );
20124                nRegs++;
20125             }
20126          }
20127
20128          IRTemp newPC = IRTemp_INVALID;
20129          if (bitR) {
20130             newPC = newTemp(Ity_I32);
20131             assign( newPC, loadLE( Ity_I32,
20132                                    binop(Iop_Add32, mkexpr(base),
20133                                                     mkU32(4 * nRegs))));
20134          }
20135
20136          /* Now we can safely install the new SP value */
20137          putIRegT(13, mkexpr(newSP), IRTemp_INVALID);
20138
20139          /* Reinstate the ITSTATE update. */
20140          put_ITSTATE(new_itstate);
20141
20142          /* now, do we also have to do a branch?  If so, it turns out
20143             that the new PC value is encoded exactly as we need it to
20144             be -- with CPSR.T in the bottom bit.  So we can simply use
20145             it as is, no need to mess with it.  Note, therefore, this
20146             is an interworking return. */
20147          if (bitR) {
20148             llPutIReg(15, mkexpr(newPC));
20149             dres.jk_StopHere = Ijk_Ret;
20150             dres.whatNext    = Dis_StopHere;
20151          }
20152
20153          DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
20154          goto decode_success;
20155       }
20156       break;
20157    }
20158
20159    case BITS7(0,0,0,1,1,1,0):   /* ADDS */
20160    case BITS7(0,0,0,1,1,1,1): { /* SUBS */
20161       /* ---------------- ADDS Rd, Rn, #uimm3 ---------------- */
20162       /* ---------------- SUBS Rd, Rn, #uimm3 ---------------- */
20163       UInt   uimm3 = INSN0(8,6);
20164       UInt   rN    = INSN0(5,3);
20165       UInt   rD    = INSN0(2,0);
20166       UInt   isSub = INSN0(9,9);
20167       IRTemp argL  = newTemp(Ity_I32);
20168       IRTemp argR  = newTemp(Ity_I32);
20169       assign( argL, getIRegT(rN) );
20170       assign( argR, mkU32(uimm3) );
20171       putIRegT(rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
20172                          mkexpr(argL), mkexpr(argR)),
20173                    condT);
20174       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
20175                       argL, argR, cond_AND_notInIT_T );
20176       DIP("%s r%u, r%u, #%u\n", isSub ? "subs" : "adds", rD, rN, uimm3);
20177       goto decode_success;
20178    }
20179
20180    case BITS7(0,0,0,1,1,0,0):   /* ADDS */
20181    case BITS7(0,0,0,1,1,0,1): { /* SUBS */
20182       /* ---------------- ADDS Rd, Rn, Rm ---------------- */
20183       /* ---------------- SUBS Rd, Rn, Rm ---------------- */
20184       UInt   rM    = INSN0(8,6);
20185       UInt   rN    = INSN0(5,3);
20186       UInt   rD    = INSN0(2,0);
20187       UInt   isSub = INSN0(9,9);
20188       IRTemp argL  = newTemp(Ity_I32);
20189       IRTemp argR  = newTemp(Ity_I32);
20190       assign( argL, getIRegT(rN) );
20191       assign( argR, getIRegT(rM) );
20192       putIRegT( rD, binop(isSub ? Iop_Sub32 : Iop_Add32,
20193                           mkexpr(argL), mkexpr(argR)),
20194                     condT );
20195       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
20196                       argL, argR, cond_AND_notInIT_T );
20197       DIP("%s r%u, r%u, r%u\n", isSub ? "subs" : "adds", rD, rN, rM);
20198       goto decode_success;
20199    }
20200
20201    case BITS7(0,1,0,1,0,0,0):   /* STR */
20202    case BITS7(0,1,0,1,1,0,0): { /* LDR */
20203       /* ------------- LDR Rd, [Rn, Rm] ------------- */
20204       /* ------------- STR Rd, [Rn, Rm] ------------- */
20205       /* LDR/STR Rd, [Rn + Rm] */
20206       UInt    rD   = INSN0(2,0);
20207       UInt    rN   = INSN0(5,3);
20208       UInt    rM   = INSN0(8,6);
20209       UInt    isLD = INSN0(11,11);
20210
20211       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20212       put_ITSTATE(old_itstate); // backout
20213       if (isLD) {
20214          IRTemp tD = newTemp(Ity_I32);
20215          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
20216          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20217       } else {
20218          storeGuardedLE(ea, getIRegT(rD), condT);
20219       }
20220       put_ITSTATE(new_itstate); // restore
20221
20222       DIP("%s r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
20223       goto decode_success;
20224    }
20225
20226    case BITS7(0,1,0,1,0,0,1):
20227    case BITS7(0,1,0,1,1,0,1): {
20228       /* ------------- LDRH Rd, [Rn, Rm] ------------- */
20229       /* ------------- STRH Rd, [Rn, Rm] ------------- */
20230       /* LDRH/STRH Rd, [Rn + Rm] */
20231       UInt    rD   = INSN0(2,0);
20232       UInt    rN   = INSN0(5,3);
20233       UInt    rM   = INSN0(8,6);
20234       UInt    isLD = INSN0(11,11);
20235
20236       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20237       put_ITSTATE(old_itstate); // backout
20238       if (isLD) {
20239          IRTemp tD = newTemp(Ity_I32);
20240          loadGuardedLE(tD, ILGop_16Uto32, ea, llGetIReg(rD), condT);
20241          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20242       } else {
20243          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
20244       }
20245       put_ITSTATE(new_itstate); // restore
20246
20247       DIP("%sh r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
20248       goto decode_success;
20249    }
20250
20251    case BITS7(0,1,0,1,1,1,1): {
20252       /* ------------- LDRSH Rd, [Rn, Rm] ------------- */
20253       /* LDRSH Rd, [Rn + Rm] */
20254       UInt    rD = INSN0(2,0);
20255       UInt    rN = INSN0(5,3);
20256       UInt    rM = INSN0(8,6);
20257
20258       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20259       put_ITSTATE(old_itstate); // backout
20260       IRTemp tD = newTemp(Ity_I32);
20261       loadGuardedLE(tD, ILGop_16Sto32, ea, llGetIReg(rD), condT);
20262       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20263       put_ITSTATE(new_itstate); // restore
20264
20265       DIP("ldrsh r%u, [r%u, r%u]\n", rD, rN, rM);
20266       goto decode_success;
20267    }
20268
20269    case BITS7(0,1,0,1,0,1,1): {
20270       /* ------------- LDRSB Rd, [Rn, Rm] ------------- */
20271       /* LDRSB Rd, [Rn + Rm] */
20272       UInt    rD = INSN0(2,0);
20273       UInt    rN = INSN0(5,3);
20274       UInt    rM = INSN0(8,6);
20275
20276       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20277       put_ITSTATE(old_itstate); // backout
20278       IRTemp tD = newTemp(Ity_I32);
20279       loadGuardedLE(tD, ILGop_8Sto32, ea, llGetIReg(rD), condT);
20280       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20281       put_ITSTATE(new_itstate); // restore
20282
20283       DIP("ldrsb r%u, [r%u, r%u]\n", rD, rN, rM);
20284       goto decode_success;
20285    }
20286
20287    case BITS7(0,1,0,1,0,1,0):
20288    case BITS7(0,1,0,1,1,1,0): {
20289       /* ------------- LDRB Rd, [Rn, Rm] ------------- */
20290       /* ------------- STRB Rd, [Rn, Rm] ------------- */
20291       /* LDRB/STRB Rd, [Rn + Rm] */
20292       UInt    rD   = INSN0(2,0);
20293       UInt    rN   = INSN0(5,3);
20294       UInt    rM   = INSN0(8,6);
20295       UInt    isLD = INSN0(11,11);
20296
20297       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), getIRegT(rM));
20298       put_ITSTATE(old_itstate); // backout
20299       if (isLD) {
20300          IRTemp tD = newTemp(Ity_I32);
20301          loadGuardedLE(tD, ILGop_8Uto32, ea, llGetIReg(rD), condT);
20302          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20303       } else {
20304          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
20305       }
20306       put_ITSTATE(new_itstate); // restore
20307
20308       DIP("%sb r%u, [r%u, r%u]\n", isLD ? "ldr" : "str", rD, rN, rM);
20309       goto decode_success;
20310    }
20311
20312    default:
20313       break; /* examine the next shortest prefix */
20314
20315    }
20316
20317
20318    /* ================ 16-bit 15:11 cases ================ */
20319
20320    switch (INSN0(15,11)) {
20321
20322    case BITS5(0,0,1,1,0):
20323    case BITS5(0,0,1,1,1): {
20324       /* ---------------- ADDS Rn, #uimm8 ---------------- */
20325       /* ---------------- SUBS Rn, #uimm8 ---------------- */
20326       UInt   isSub = INSN0(11,11);
20327       UInt   rN    = INSN0(10,8);
20328       UInt   uimm8 = INSN0(7,0);
20329       IRTemp argL  = newTemp(Ity_I32);
20330       IRTemp argR  = newTemp(Ity_I32);
20331       assign( argL, getIRegT(rN) );
20332       assign( argR, mkU32(uimm8) );
20333       putIRegT( rN, binop(isSub ? Iop_Sub32 : Iop_Add32,
20334                           mkexpr(argL), mkexpr(argR)), condT );
20335       setFlags_D1_D2( isSub ? ARMG_CC_OP_SUB : ARMG_CC_OP_ADD,
20336                       argL, argR, cond_AND_notInIT_T );
20337       DIP("%s r%u, #%u\n", isSub ? "subs" : "adds", rN, uimm8);
20338       goto decode_success;
20339    }
20340
20341    case BITS5(1,0,1,0,0): {
20342       /* ---------------- ADD rD, PC, #imm8 * 4 ---------------- */
20343       /* a.k.a. ADR */
20344       /* rD = align4(PC) + imm8 * 4 */
20345       UInt rD   = INSN0(10,8);
20346       UInt imm8 = INSN0(7,0);
20347       putIRegT(rD, binop(Iop_Add32,
20348                          binop(Iop_And32, getIRegT(15), mkU32(~3U)),
20349                          mkU32(imm8 * 4)),
20350                    condT);
20351       DIP("add r%u, pc, #%u\n", rD, imm8 * 4);
20352       goto decode_success;
20353    }
20354
20355    case BITS5(1,0,1,0,1): {
20356       /* ---------------- ADD rD, SP, #imm8 * 4 ---------------- */
20357       UInt rD   = INSN0(10,8);
20358       UInt imm8 = INSN0(7,0);
20359       putIRegT(rD, binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4)),
20360                    condT);
20361       DIP("add r%u, r13, #%u\n", rD, imm8 * 4);
20362       goto decode_success;
20363    }
20364
20365    case BITS5(0,0,1,0,1): {
20366       /* ---------------- CMP Rn, #uimm8 ---------------- */
20367       UInt   rN    = INSN0(10,8);
20368       UInt   uimm8 = INSN0(7,0);
20369       IRTemp argL  = newTemp(Ity_I32);
20370       IRTemp argR  = newTemp(Ity_I32);
20371       assign( argL, getIRegT(rN) );
20372       assign( argR, mkU32(uimm8) );
20373       /* Update flags regardless of whether in an IT block or not. */
20374       setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
20375       DIP("cmp r%u, #%u\n", rN, uimm8);
20376       goto decode_success;
20377    }
20378
20379    case BITS5(0,0,1,0,0): {
20380       /* -------------- (T1) MOVS Rn, #uimm8 -------------- */
20381       UInt   rD    = INSN0(10,8);
20382       UInt   uimm8 = INSN0(7,0);
20383       IRTemp oldV  = newTemp(Ity_I32);
20384       IRTemp oldC  = newTemp(Ity_I32);
20385       IRTemp res   = newTemp(Ity_I32);
20386       assign( oldV, mk_armg_calculate_flag_v() );
20387       assign( oldC, mk_armg_calculate_flag_c() );
20388       assign( res, mkU32(uimm8) );
20389       putIRegT(rD, mkexpr(res), condT);
20390       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
20391                          cond_AND_notInIT_T );
20392       DIP("movs r%u, #%u\n", rD, uimm8);
20393       goto decode_success;
20394    }
20395
20396    case BITS5(0,1,0,0,1): {
20397       /* ------------- LDR Rd, [PC, #imm8 * 4] ------------- */
20398       /* LDR Rd, [align4(PC) + imm8 * 4] */
20399       UInt   rD   = INSN0(10,8);
20400       UInt   imm8 = INSN0(7,0);
20401       IRTemp ea   = newTemp(Ity_I32);
20402
20403       assign(ea, binop(Iop_Add32,
20404                        binop(Iop_And32, getIRegT(15), mkU32(~3U)),
20405                        mkU32(imm8 * 4)));
20406       put_ITSTATE(old_itstate); // backout
20407       IRTemp tD = newTemp(Ity_I32);
20408       loadGuardedLE( tD, ILGop_Ident32, mkexpr(ea), llGetIReg(rD), condT );
20409       putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20410       put_ITSTATE(new_itstate); // restore
20411
20412       DIP("ldr r%u, [pc, #%u]\n", rD, imm8 * 4);
20413       goto decode_success;
20414    }
20415
20416    case BITS5(0,1,1,0,0):   /* STR */
20417    case BITS5(0,1,1,0,1): { /* LDR */
20418       /* ------------- LDR Rd, [Rn, #imm5 * 4] ------------- */
20419       /* ------------- STR Rd, [Rn, #imm5 * 4] ------------- */
20420       /* LDR/STR Rd, [Rn + imm5 * 4] */
20421       UInt    rD   = INSN0(2,0);
20422       UInt    rN   = INSN0(5,3);
20423       UInt    imm5 = INSN0(10,6);
20424       UInt    isLD = INSN0(11,11);
20425
20426       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 4));
20427       put_ITSTATE(old_itstate); // backout
20428       if (isLD) {
20429          IRTemp tD = newTemp(Ity_I32);
20430          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
20431          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20432       } else {
20433          storeGuardedLE( ea, getIRegT(rD), condT );
20434       }
20435       put_ITSTATE(new_itstate); // restore
20436
20437       DIP("%s r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 4);
20438       goto decode_success;
20439    }
20440
20441    case BITS5(1,0,0,0,0):   /* STRH */
20442    case BITS5(1,0,0,0,1): { /* LDRH */
20443       /* ------------- LDRH Rd, [Rn, #imm5 * 2] ------------- */
20444       /* ------------- STRH Rd, [Rn, #imm5 * 2] ------------- */
20445       /* LDRH/STRH Rd, [Rn + imm5 * 2] */
20446       UInt    rD   = INSN0(2,0);
20447       UInt    rN   = INSN0(5,3);
20448       UInt    imm5 = INSN0(10,6);
20449       UInt    isLD = INSN0(11,11);
20450
20451       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5 * 2));
20452       put_ITSTATE(old_itstate); // backout
20453       if (isLD) {
20454          IRTemp tD = newTemp(Ity_I32);
20455          loadGuardedLE( tD, ILGop_16Uto32, ea, llGetIReg(rD), condT );
20456          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20457       } else {
20458          storeGuardedLE( ea, unop(Iop_32to16, getIRegT(rD)), condT );
20459       }
20460       put_ITSTATE(new_itstate); // restore
20461
20462       DIP("%sh r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5 * 2);
20463       goto decode_success;
20464    }
20465
20466    case BITS5(0,1,1,1,0):   /* STRB */
20467    case BITS5(0,1,1,1,1): { /* LDRB */
20468       /* ------------- LDRB Rd, [Rn, #imm5] ------------- */
20469       /* ------------- STRB Rd, [Rn, #imm5] ------------- */
20470       /* LDRB/STRB Rd, [Rn + imm5] */
20471       UInt    rD   = INSN0(2,0);
20472       UInt    rN   = INSN0(5,3);
20473       UInt    imm5 = INSN0(10,6);
20474       UInt    isLD = INSN0(11,11);
20475
20476       IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm5));
20477       put_ITSTATE(old_itstate); // backout
20478       if (isLD) {
20479          IRTemp tD = newTemp(Ity_I32);
20480          loadGuardedLE( tD, ILGop_8Uto32, ea, llGetIReg(rD), condT );
20481          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20482       } else {
20483          storeGuardedLE( ea, unop(Iop_32to8, getIRegT(rD)), condT );
20484       }
20485       put_ITSTATE(new_itstate); // restore
20486
20487       DIP("%sb r%u, [r%u, #%u]\n", isLD ? "ldr" : "str", rD, rN, imm5);
20488       goto decode_success;
20489    }
20490
20491    case BITS5(1,0,0,1,0):   /* STR */
20492    case BITS5(1,0,0,1,1): { /* LDR */
20493       /* ------------- LDR Rd, [SP, #imm8 * 4] ------------- */
20494       /* ------------- STR Rd, [SP, #imm8 * 4] ------------- */
20495       /* LDR/STR Rd, [SP + imm8 * 4] */
20496       UInt rD    = INSN0(10,8);
20497       UInt imm8  = INSN0(7,0);
20498       UInt isLD  = INSN0(11,11);
20499
20500       IRExpr* ea = binop(Iop_Add32, getIRegT(13), mkU32(imm8 * 4));
20501       put_ITSTATE(old_itstate); // backout
20502       if (isLD) {
20503          IRTemp tD = newTemp(Ity_I32);
20504          loadGuardedLE( tD, ILGop_Ident32, ea, llGetIReg(rD), condT );
20505          putIRegT(rD, mkexpr(tD), IRTemp_INVALID);
20506       } else {
20507          storeGuardedLE(ea, getIRegT(rD), condT);
20508       }
20509       put_ITSTATE(new_itstate); // restore
20510
20511       DIP("%s r%u, [sp, #%u]\n", isLD ? "ldr" : "str", rD, imm8 * 4);
20512       goto decode_success;
20513    }
20514
20515    case BITS5(1,1,0,0,1): {
20516       /* ------------- LDMIA Rn!, {reglist} ------------- */
20517       Int i, nRegs = 0;
20518       UInt rN   = INSN0(10,8);
20519       UInt list = INSN0(7,0);
20520       /* Empty lists aren't allowed. */
20521       if (list != 0) {
20522          mk_skip_over_T16_if_cond_is_false(condT);
20523          condT = IRTemp_INVALID;
20524          put_ITSTATE(old_itstate);
20525          // now uncond
20526
20527          IRTemp oldRn = newTemp(Ity_I32);
20528          IRTemp base  = newTemp(Ity_I32);
20529          assign(oldRn, getIRegT(rN));
20530          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
20531          for (i = 0; i < 8; i++) {
20532             if (0 == (list & (1 << i)))
20533                continue;
20534             nRegs++;
20535             putIRegT(
20536                i, loadLE(Ity_I32,
20537                          binop(Iop_Add32, mkexpr(base),
20538                                           mkU32(nRegs * 4 - 4))),
20539                IRTemp_INVALID
20540             );
20541          }
20542          /* Only do the writeback for rN if it isn't in the list of
20543             registers to be transferred. */
20544          if (0 == (list & (1 << rN))) {
20545             putIRegT(rN,
20546                      binop(Iop_Add32, mkexpr(oldRn),
20547                                       mkU32(nRegs * 4)),
20548                      IRTemp_INVALID
20549             );
20550          }
20551
20552          /* Reinstate the ITSTATE update. */
20553          put_ITSTATE(new_itstate);
20554
20555          DIP("ldmia r%u!, {0x%04x}\n", rN, list);
20556          goto decode_success;
20557       }
20558       break;
20559    }
20560
20561    case BITS5(1,1,0,0,0): {
20562       /* ------------- STMIA Rn!, {reglist} ------------- */
20563       Int i, nRegs = 0;
20564       UInt rN   = INSN0(10,8);
20565       UInt list = INSN0(7,0);
20566       /* Empty lists aren't allowed.  Also, if rN is in the list then
20567          it must be the lowest numbered register in the list. */
20568       Bool valid = list != 0;
20569       if (valid && 0 != (list & (1 << rN))) {
20570          for (i = 0; i < rN; i++) {
20571             if (0 != (list & (1 << i)))
20572                valid = False;
20573          }
20574       }
20575       if (valid) {
20576          mk_skip_over_T16_if_cond_is_false(condT);
20577          condT = IRTemp_INVALID;
20578          put_ITSTATE(old_itstate);
20579          // now uncond
20580
20581          IRTemp oldRn = newTemp(Ity_I32);
20582          IRTemp base = newTemp(Ity_I32);
20583          assign(oldRn, getIRegT(rN));
20584          assign(base, binop(Iop_And32, mkexpr(oldRn), mkU32(~3U)));
20585          for (i = 0; i < 8; i++) {
20586             if (0 == (list & (1 << i)))
20587                continue;
20588             nRegs++;
20589             storeLE( binop(Iop_Add32, mkexpr(base), mkU32(nRegs * 4 - 4)),
20590                      getIRegT(i) );
20591          }
20592          /* Always do the writeback. */
20593          putIRegT(rN,
20594                   binop(Iop_Add32, mkexpr(oldRn),
20595                                    mkU32(nRegs * 4)),
20596                   IRTemp_INVALID);
20597
20598          /* Reinstate the ITSTATE update. */
20599          put_ITSTATE(new_itstate);
20600
20601          DIP("stmia r%u!, {0x%04x}\n", rN, list);
20602          goto decode_success;
20603       }
20604       break;
20605    }
20606
20607    case BITS5(0,0,0,0,0):   /* LSLS */
20608    case BITS5(0,0,0,0,1):   /* LSRS */
20609    case BITS5(0,0,0,1,0): { /* ASRS */
20610       /* ---------------- LSLS Rd, Rm, #imm5 ---------------- */
20611       /* ---------------- LSRS Rd, Rm, #imm5 ---------------- */
20612       /* ---------------- ASRS Rd, Rm, #imm5 ---------------- */
20613       UInt   rD   = INSN0(2,0);
20614       UInt   rM   = INSN0(5,3);
20615       UInt   imm5 = INSN0(10,6);
20616       IRTemp res  = newTemp(Ity_I32);
20617       IRTemp resC = newTemp(Ity_I32);
20618       IRTemp rMt  = newTemp(Ity_I32);
20619       IRTemp oldV = newTemp(Ity_I32);
20620       const HChar* wot  = "???";
20621       assign(rMt, getIRegT(rM));
20622       assign(oldV, mk_armg_calculate_flag_v());
20623       /* Looks like INSN0(12,11) are the standard 'how' encoding.
20624          Could compactify if the ROR case later appears. */
20625       switch (INSN0(15,11)) {
20626          case BITS5(0,0,0,0,0):
20627             compute_result_and_C_after_LSL_by_imm5(
20628                dis_buf, &res, &resC, rMt, imm5, rM
20629             );
20630             wot = "lsl";
20631             break;
20632          case BITS5(0,0,0,0,1):
20633             compute_result_and_C_after_LSR_by_imm5(
20634                dis_buf, &res, &resC, rMt, imm5, rM
20635             );
20636             wot = "lsr";
20637             break;
20638          case BITS5(0,0,0,1,0):
20639             compute_result_and_C_after_ASR_by_imm5(
20640                dis_buf, &res, &resC, rMt, imm5, rM
20641             );
20642             wot = "asr";
20643             break;
20644          default:
20645             /*NOTREACHED*/vassert(0);
20646       }
20647       // not safe to read guest state after this point
20648       putIRegT(rD, mkexpr(res), condT);
20649       setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, resC, oldV,
20650                          cond_AND_notInIT_T );
20651       /* ignore buf and roll our own output */
20652       DIP("%ss r%u, r%u, #%u\n", wot, rD, rM, imm5);
20653       goto decode_success;
20654    }
20655
20656    case BITS5(1,1,1,0,0): {
20657       /* ---------------- B #simm11 ---------------- */
20658       UInt uimm11 = INSN0(10,0);  uimm11 <<= 21;
20659       Int  simm11 = (Int)uimm11;  simm11 >>= 20;
20660       UInt dst    = simm11 + guest_R15_curr_instr_notENC + 4;
20661       /* Only allowed outside or last-in IT block; SIGILL if not so. */
20662       gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20663       // and skip this insn if not selected; being cleverer is too
20664       // difficult
20665       mk_skip_over_T16_if_cond_is_false(condT);
20666       condT = IRTemp_INVALID;
20667       // now uncond
20668       llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
20669       dres.jk_StopHere = Ijk_Boring;
20670       dres.whatNext    = Dis_StopHere;
20671       DIP("b 0x%x\n", dst);
20672       goto decode_success;
20673    }
20674
20675    default:
20676       break; /* examine the next shortest prefix */
20677
20678    }
20679
20680
20681    /* ================ 16-bit 15:12 cases ================ */
20682
20683    switch (INSN0(15,12)) {
20684
20685    case BITS4(1,1,0,1): {
20686       /* ---------------- Bcond #simm8 ---------------- */
20687       UInt cond  = INSN0(11,8);
20688       UInt uimm8 = INSN0(7,0);  uimm8 <<= 24;
20689       Int  simm8 = (Int)uimm8;  simm8 >>= 23;
20690       UInt dst   = simm8 + guest_R15_curr_instr_notENC + 4;
20691       if (cond != ARMCondAL && cond != ARMCondNV) {
20692          /* Not allowed in an IT block; SIGILL if so. */
20693          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
20694
20695          IRTemp kondT = newTemp(Ity_I32);
20696          assign( kondT, mk_armg_calculate_condition(cond) );
20697          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
20698                             Ijk_Boring,
20699                             IRConst_U32(dst | 1/*CPSR.T*/),
20700                             OFFB_R15T ));
20701          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2)
20702                               | 1 /*CPSR.T*/ ));
20703          dres.jk_StopHere = Ijk_Boring;
20704          dres.whatNext    = Dis_StopHere;
20705          DIP("b%s 0x%x\n", nCC(cond), dst);
20706          goto decode_success;
20707       }
20708       break;
20709    }
20710
20711    default:
20712       break; /* hmm, nothing matched */
20713
20714    }
20715
20716    /* ================ 16-bit misc cases ================ */
20717
20718    switch (INSN0(15,0)) {
20719       case 0xBF00:
20720          /* ------ NOP ------ */
20721          DIP("nop\n");
20722          goto decode_success;
20723       case 0xBF10: // YIELD
20724       case 0xBF20: // WFE
20725          /* ------ WFE, YIELD ------ */
20726          /* Both appear to get used as a spin-loop hints.  Do the usual thing,
20727             which is to continue after yielding. */
20728          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
20729                             Ijk_Yield,
20730                             IRConst_U32((guest_R15_curr_instr_notENC + 2)
20731                                         | 1 /*CPSR.T*/),
20732                             OFFB_R15T ));
20733          Bool isWFE = INSN0(15,0) == 0xBF20;
20734          DIP(isWFE ? "wfe\n" : "yield\n");
20735          goto decode_success;
20736       case 0xBF40:
20737          /* ------ SEV ------ */
20738          /* Treat this as a no-op.  Any matching WFEs won't really
20739             cause the host CPU to snooze; they just cause V to try to
20740             run some other thread for a while.  So there's no point in
20741             really doing anything for SEV. */
20742          DIP("sev\n");
20743          goto decode_success;
20744       default:
20745          break; /* fall through */
20746    }
20747
20748    /* ----------------------------------------------------------- */
20749    /* --                                                       -- */
20750    /* -- Thumb 32-bit integer instructions                     -- */
20751    /* --                                                       -- */
20752    /* ----------------------------------------------------------- */
20753
20754 #  define INSN1(_bMax,_bMin)  SLICE_UInt(((UInt)insn1), (_bMax), (_bMin))
20755
20756    /* second 16 bits of the instruction, if any */
20757    vassert(insn1 == 0);
20758    insn1 = getUShortLittleEndianly( guest_instr+2 );
20759
20760    anOp   = Iop_INVALID; /* paranoia */
20761    anOpNm = NULL;        /* paranoia */
20762
20763    /* Change result defaults to suit 32-bit insns. */
20764    vassert(dres.whatNext   == Dis_Continue);
20765    vassert(dres.len        == 2);
20766    vassert(dres.continueAt == 0);
20767    dres.len = 4;
20768
20769    /* ---------------- BL/BLX simm26 ---------------- */
20770    if (BITS5(1,1,1,1,0) == INSN0(15,11) && BITS2(1,1) == INSN1(15,14)) {
20771       UInt isBL = INSN1(12,12);
20772       UInt bS   = INSN0(10,10);
20773       UInt bJ1  = INSN1(13,13);
20774       UInt bJ2  = INSN1(11,11);
20775       UInt bI1  = 1 ^ (bJ1 ^ bS);
20776       UInt bI2  = 1 ^ (bJ2 ^ bS);
20777       UInt uimm25
20778          =   (bS          << (1 + 1 + 10 + 11 + 1))
20779            | (bI1         << (1 + 10 + 11 + 1))
20780            | (bI2         << (10 + 11 + 1))
20781            | (INSN0(9,0)  << (11 + 1))
20782            | (INSN1(10,0) << 1);
20783       uimm25 <<= 7;
20784       Int simm25 = (Int)uimm25;
20785       simm25 >>= 7;
20786
20787       vassert(0 == (guest_R15_curr_instr_notENC & 1));
20788       UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
20789
20790       /* One further validity case to check: in the case of BLX
20791          (not-BL), that insn1[0] must be zero. */
20792       Bool valid = True;
20793       if (isBL == 0 && INSN1(0,0) == 1) valid = False;
20794       if (valid) {
20795          /* Only allowed outside or last-in IT block; SIGILL if not so. */
20796          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20797          // and skip this insn if not selected; being cleverer is too
20798          // difficult
20799          mk_skip_over_T32_if_cond_is_false(condT);
20800          condT = IRTemp_INVALID;
20801          // now uncond
20802
20803          /* We're returning to Thumb code, hence "| 1" */
20804          putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 4) | 1 ),
20805                    IRTemp_INVALID);
20806          if (isBL) {
20807             /* BL: unconditional T -> T call */
20808             /* we're calling Thumb code, hence "| 1" */
20809             llPutIReg(15, mkU32( dst | 1 ));
20810             DIP("bl 0x%x (stay in Thumb mode)\n", dst);
20811          } else {
20812             /* BLX: unconditional T -> A call */
20813             /* we're calling ARM code, hence "& 3" to align to a
20814                valid ARM insn address */
20815             llPutIReg(15, mkU32( dst & ~3 ));
20816             DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
20817          }
20818          dres.whatNext    = Dis_StopHere;
20819          dres.jk_StopHere = Ijk_Call;
20820          goto decode_success;
20821       }
20822    }
20823
20824    /* ---------------- {LD,ST}M{IA,DB} ---------------- */
20825    if (0x3a2 == INSN0(15,6) // {LD,ST}MIA
20826        || 0x3a4 == INSN0(15,6)) { // {LD,ST}MDB
20827       UInt bW      = INSN0(5,5); /* writeback Rn ? */
20828       UInt bL      = INSN0(4,4);
20829       UInt rN      = INSN0(3,0);
20830       UInt bP      = INSN1(15,15); /* reglist entry for r15 */
20831       UInt bM      = INSN1(14,14); /* reglist entry for r14 */
20832       UInt rLmost  = INSN1(12,0);  /* reglist entry for r0 .. 12 */
20833       UInt rL13    = INSN1(13,13); /* must be zero */
20834       UInt regList = 0;
20835       Bool valid   = True;
20836
20837       UInt bINC    = 1;
20838       UInt bBEFORE = 0;
20839       if (INSN0(15,6) == 0x3a4) {
20840          bINC    = 0;
20841          bBEFORE = 1;
20842       }
20843
20844       /* detect statically invalid cases, and construct the final
20845          reglist */
20846       if (rL13 == 1)
20847          valid = False;
20848
20849       if (bL == 1) {
20850          regList = (bP << 15) | (bM << 14) | rLmost;
20851          if (rN == 15)                       valid = False;
20852          if (popcount32(regList) < 2)        valid = False;
20853          if (bP == 1 && bM == 1)             valid = False;
20854          if (bW == 1 && (regList & (1<<rN))) valid = False;
20855       } else {
20856          regList = (bM << 14) | rLmost;
20857          if (bP == 1)                        valid = False;
20858          if (rN == 15)                       valid = False;
20859          if (popcount32(regList) < 2)        valid = False;
20860          if (bW == 1 && (regList & (1<<rN))) valid = False;
20861       }
20862
20863       if (valid) {
20864          if (bL == 1 && bP == 1) {
20865             // We'll be writing the PC.  Hence:
20866             /* Only allowed outside or last-in IT block; SIGILL if not so. */
20867             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
20868          }
20869
20870          /* Go uncond: */
20871          mk_skip_over_T32_if_cond_is_false(condT);
20872          condT = IRTemp_INVALID;
20873          // now uncond
20874
20875          /* Generate the IR.  This might generate a write to R15. */
20876          mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
20877
20878          if (bL == 1 && (regList & (1<<15))) {
20879             // If we wrote to R15, we have an interworking return to
20880             // deal with.
20881             llPutIReg(15, llGetIReg(15));
20882             dres.jk_StopHere = Ijk_Ret;
20883             dres.whatNext    = Dis_StopHere;
20884          }
20885
20886          DIP("%sm%c%c r%u%s, {0x%04x}\n",
20887               bL == 1 ? "ld" : "st", bINC ? 'i' : 'd', bBEFORE ? 'b' : 'a',
20888               rN, bW ? "!" : "", regList);
20889
20890          goto decode_success;
20891       }
20892    }
20893
20894    /* -------------- (T3) ADD{S}.W Rd, Rn, #constT -------------- */
20895    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20896        && INSN0(9,5) == BITS5(0,1,0,0,0)
20897        && INSN1(15,15) == 0) {
20898       UInt bS = INSN0(4,4);
20899       UInt rN = INSN0(3,0);
20900       UInt rD = INSN1(11,8);
20901       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
20902       /* but allow "add.w reg, sp, #constT" for reg != PC */
20903       if (!valid && rD <= 14 && rN == 13)
20904          valid = True;
20905       if (valid) {
20906          IRTemp argL  = newTemp(Ity_I32);
20907          IRTemp argR  = newTemp(Ity_I32);
20908          IRTemp res   = newTemp(Ity_I32);
20909          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
20910          assign(argL, getIRegT(rN));
20911          assign(argR, mkU32(imm32));
20912          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
20913          putIRegT(rD, mkexpr(res), condT);
20914          if (bS == 1)
20915             setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
20916          DIP("add%s.w r%u, r%u, #%u\n",
20917              bS == 1 ? "s" : "", rD, rN, imm32);
20918          goto decode_success;
20919       }
20920    }
20921
20922    /* ---------------- (T4) ADDW Rd, Rn, #uimm12 -------------- */
20923    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20924        && INSN0(9,4) == BITS6(1,0,0,0,0,0)
20925        && INSN1(15,15) == 0) {
20926       UInt rN = INSN0(3,0);
20927       UInt rD = INSN1(11,8);
20928       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
20929       /* but allow "addw reg, sp, #uimm12" for reg != PC */
20930       if (!valid && rD <= 14 && rN == 13)
20931          valid = True;
20932       if (valid) {
20933          IRTemp argL = newTemp(Ity_I32);
20934          IRTemp argR = newTemp(Ity_I32);
20935          IRTemp res  = newTemp(Ity_I32);
20936          UInt imm12  = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
20937          assign(argL, getIRegT(rN));
20938          assign(argR, mkU32(imm12));
20939          assign(res,  binop(Iop_Add32, mkexpr(argL), mkexpr(argR)));
20940          putIRegT(rD, mkexpr(res), condT);
20941          DIP("addw r%u, r%u, #%u\n", rD, rN, imm12);
20942          goto decode_success;
20943       }
20944    }
20945
20946    /* ---------------- (T2) CMP.W Rn, #constT ---------------- */
20947    /* ---------------- (T2) CMN.W Rn, #constT ---------------- */
20948    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20949        && (   INSN0(9,4) == BITS6(0,1,1,0,1,1)  // CMP
20950            || INSN0(9,4) == BITS6(0,1,0,0,0,1)) // CMN
20951        && INSN1(15,15) == 0
20952        && INSN1(11,8) == BITS4(1,1,1,1)) {
20953       UInt rN = INSN0(3,0);
20954       if (rN != 15) {
20955          IRTemp argL  = newTemp(Ity_I32);
20956          IRTemp argR  = newTemp(Ity_I32);
20957          Bool   isCMN = INSN0(9,4) == BITS6(0,1,0,0,0,1);
20958          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
20959          assign(argL, getIRegT(rN));
20960          assign(argR, mkU32(imm32));
20961          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
20962                          argL, argR, condT );
20963          DIP("%s.w r%u, #%u\n", isCMN ? "cmn" : "cmp", rN, imm32);
20964          goto decode_success;
20965       }
20966    }
20967
20968    /* -------------- (T1) TST.W Rn, #constT -------------- */
20969    /* -------------- (T1) TEQ.W Rn, #constT -------------- */
20970    if (INSN0(15,11) == BITS5(1,1,1,1,0)
20971        && (   INSN0(9,4) == BITS6(0,0,0,0,0,1)  // TST
20972            || INSN0(9,4) == BITS6(0,0,1,0,0,1)) // TEQ
20973        && INSN1(15,15) == 0
20974        && INSN1(11,8) == BITS4(1,1,1,1)) {
20975       UInt rN = INSN0(3,0);
20976       if (!isBadRegT(rN)) { // yes, really, it's inconsistent with CMP.W
20977          Bool  isTST  = INSN0(9,4) == BITS6(0,0,0,0,0,1);
20978          IRTemp argL  = newTemp(Ity_I32);
20979          IRTemp argR  = newTemp(Ity_I32);
20980          IRTemp res   = newTemp(Ity_I32);
20981          IRTemp oldV  = newTemp(Ity_I32);
20982          IRTemp oldC  = newTemp(Ity_I32);
20983          Bool   updC  = False;
20984          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
20985          assign(argL, getIRegT(rN));
20986          assign(argR, mkU32(imm32));
20987          assign(res,  binop(isTST ? Iop_And32 : Iop_Xor32,
20988                             mkexpr(argL), mkexpr(argR)));
20989          assign( oldV, mk_armg_calculate_flag_v() );
20990          assign( oldC, updC
20991                        ? mkU32((imm32 >> 31) & 1)
20992                        : mk_armg_calculate_flag_c() );
20993          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT );
20994          DIP("%s.w r%u, #%u\n", isTST ? "tst" : "teq", rN, imm32);
20995          goto decode_success;
20996       }
20997    }
20998
20999    /* -------------- (T3) SUB{S}.W Rd, Rn, #constT -------------- */
21000    /* -------------- (T3) RSB{S}.W Rd, Rn, #constT -------------- */
21001    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21002        && (INSN0(9,5) == BITS5(0,1,1,0,1) // SUB
21003            || INSN0(9,5) == BITS5(0,1,1,1,0)) // RSB
21004        && INSN1(15,15) == 0) {
21005       Bool isRSB = INSN0(9,5) == BITS5(0,1,1,1,0);
21006       UInt bS    = INSN0(4,4);
21007       UInt rN    = INSN0(3,0);
21008       UInt rD    = INSN1(11,8);
21009       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
21010       /* but allow "sub{s}.w reg, sp, #constT
21011          this is (T2) of "SUB (SP minus immediate)" */
21012       if (!valid && !isRSB && rN == 13 && rD != 15)
21013          valid = True;
21014       if (valid) {
21015          IRTemp argL  = newTemp(Ity_I32);
21016          IRTemp argR  = newTemp(Ity_I32);
21017          IRTemp res   = newTemp(Ity_I32);
21018          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
21019          assign(argL, getIRegT(rN));
21020          assign(argR, mkU32(imm32));
21021          assign(res,  isRSB
21022                       ? binop(Iop_Sub32, mkexpr(argR), mkexpr(argL))
21023                       : binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
21024          putIRegT(rD, mkexpr(res), condT);
21025          if (bS == 1) {
21026             if (isRSB)
21027                setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
21028             else
21029                setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
21030          }
21031          DIP("%s%s.w r%u, r%u, #%u\n",
21032              isRSB ? "rsb" : "sub", bS == 1 ? "s" : "", rD, rN, imm32);
21033          goto decode_success;
21034       }
21035    }
21036
21037    /* -------------- (T4) SUBW Rd, Rn, #uimm12 ------------------- */
21038    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21039        && INSN0(9,4) == BITS6(1,0,1,0,1,0)
21040        && INSN1(15,15) == 0) {
21041       UInt rN = INSN0(3,0);
21042       UInt rD = INSN1(11,8);
21043       Bool valid = !isBadRegT(rN) && !isBadRegT(rD);
21044       /* but allow "subw sp, sp, #uimm12" */
21045       if (!valid && rD == 13 && rN == 13)
21046          valid = True;
21047       if (valid) {
21048          IRTemp argL  = newTemp(Ity_I32);
21049          IRTemp argR  = newTemp(Ity_I32);
21050          IRTemp res   = newTemp(Ity_I32);
21051          UInt imm12   = (INSN0(10,10) << 11) | (INSN1(14,12) << 8) | INSN1(7,0);
21052          assign(argL, getIRegT(rN));
21053          assign(argR, mkU32(imm12));
21054          assign(res,  binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)));
21055          putIRegT(rD, mkexpr(res), condT);
21056          DIP("subw r%u, r%u, #%u\n", rD, rN, imm12);
21057          goto decode_success;
21058       }
21059    }
21060
21061    /* -------------- (T1) ADC{S}.W Rd, Rn, #constT -------------- */
21062    /* -------------- (T1) SBC{S}.W Rd, Rn, #constT -------------- */
21063    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21064        && (   INSN0(9,5) == BITS5(0,1,0,1,0)  // ADC
21065            || INSN0(9,5) == BITS5(0,1,0,1,1)) // SBC
21066        && INSN1(15,15) == 0) {
21067       /* ADC:  Rd = Rn + constT + oldC */
21068       /* SBC:  Rd = Rn - constT - (oldC ^ 1) */
21069       UInt bS    = INSN0(4,4);
21070       UInt rN    = INSN0(3,0);
21071       UInt rD    = INSN1(11,8);
21072       if (!isBadRegT(rN) && !isBadRegT(rD)) {
21073          IRTemp argL  = newTemp(Ity_I32);
21074          IRTemp argR  = newTemp(Ity_I32);
21075          IRTemp res   = newTemp(Ity_I32);
21076          IRTemp oldC  = newTemp(Ity_I32);
21077          UInt   imm32 = thumbExpandImm_from_I0_I1(NULL, insn0, insn1);
21078          assign(argL, getIRegT(rN));
21079          assign(argR, mkU32(imm32));
21080          assign(oldC, mk_armg_calculate_flag_c() );
21081          const HChar* nm  = "???";
21082          switch (INSN0(9,5)) {
21083             case BITS5(0,1,0,1,0): // ADC
21084                nm = "adc";
21085                assign(res,
21086                       binop(Iop_Add32,
21087                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
21088                             mkexpr(oldC) ));
21089                putIRegT(rD, mkexpr(res), condT);
21090                if (bS)
21091                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
21092                                      argL, argR, oldC, condT );
21093                break;
21094             case BITS5(0,1,0,1,1): // SBC
21095                nm = "sbc";
21096                assign(res,
21097                       binop(Iop_Sub32,
21098                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
21099                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
21100                putIRegT(rD, mkexpr(res), condT);
21101                if (bS)
21102                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
21103                                      argL, argR, oldC, condT );
21104                break;
21105             default:
21106               vassert(0);
21107          }
21108          DIP("%s%s.w r%u, r%u, #%u\n",
21109              nm, bS == 1 ? "s" : "", rD, rN, imm32);
21110          goto decode_success;
21111       }
21112    }
21113
21114    /* -------------- (T1) ORR{S}.W Rd, Rn, #constT -------------- */
21115    /* -------------- (T1) AND{S}.W Rd, Rn, #constT -------------- */
21116    /* -------------- (T1) BIC{S}.W Rd, Rn, #constT -------------- */
21117    /* -------------- (T1) EOR{S}.W Rd, Rn, #constT -------------- */
21118    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21119        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // ORR
21120            || INSN0(9,5) == BITS5(0,0,0,0,0)  // AND
21121            || INSN0(9,5) == BITS5(0,0,0,0,1)  // BIC
21122            || INSN0(9,5) == BITS5(0,0,1,0,0)  // EOR
21123            || INSN0(9,5) == BITS5(0,0,0,1,1)) // ORN
21124        && INSN1(15,15) == 0) {
21125       UInt bS = INSN0(4,4);
21126       UInt rN = INSN0(3,0);
21127       UInt rD = INSN1(11,8);
21128       if (!isBadRegT(rN) && !isBadRegT(rD)) {
21129          Bool   notArgR = False;
21130          IROp   op      = Iop_INVALID;
21131          const HChar* nm = "???";
21132          switch (INSN0(9,5)) {
21133             case BITS5(0,0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
21134             case BITS5(0,0,0,0,0): op = Iop_And32; nm = "and"; break;
21135             case BITS5(0,0,0,0,1): op = Iop_And32; nm = "bic";
21136                                    notArgR = True; break;
21137             case BITS5(0,0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
21138             case BITS5(0,0,0,1,1): op = Iop_Or32;  nm = "orn";
21139                                    notArgR = True; break;
21140             default: vassert(0);
21141          }
21142          IRTemp argL  = newTemp(Ity_I32);
21143          IRTemp argR  = newTemp(Ity_I32);
21144          IRTemp res   = newTemp(Ity_I32);
21145          Bool   updC  = False;
21146          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
21147          assign(argL, getIRegT(rN));
21148          assign(argR, mkU32(notArgR ? ~imm32 : imm32));
21149          assign(res,  binop(op, mkexpr(argL), mkexpr(argR)));
21150          putIRegT(rD, mkexpr(res), condT);
21151          if (bS) {
21152             IRTemp oldV = newTemp(Ity_I32);
21153             IRTemp oldC = newTemp(Ity_I32);
21154             assign( oldV, mk_armg_calculate_flag_v() );
21155             assign( oldC, updC
21156                           ? mkU32((imm32 >> 31) & 1)
21157                           : mk_armg_calculate_flag_c() );
21158             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21159                                condT );
21160          }
21161          DIP("%s%s.w r%u, r%u, #%u\n",
21162              nm, bS == 1 ? "s" : "", rD, rN, imm32);
21163          goto decode_success;
21164       }
21165    }
21166
21167    /* ---------- (T3) ADD{S}.W Rd, Rn, Rm, {shift} ---------- */
21168    /* ---------- (T3) SUB{S}.W Rd, Rn, Rm, {shift} ---------- */
21169    /* ---------- (T3) RSB{S}.W Rd, Rn, Rm, {shift} ---------- */
21170    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21171        && (   INSN0(8,5) == BITS4(1,0,0,0)  // add subopc
21172            || INSN0(8,5) == BITS4(1,1,0,1)  // sub subopc
21173            || INSN0(8,5) == BITS4(1,1,1,0)) // rsb subopc
21174        && INSN1(15,15) == 0) {
21175       UInt rN   = INSN0(3,0);
21176       UInt rD   = INSN1(11,8);
21177       UInt rM   = INSN1(3,0);
21178       UInt bS   = INSN0(4,4);
21179       UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21180       UInt how  = INSN1(5,4);
21181
21182       Bool valid = !isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM);
21183       /* but allow "add.w reg, sp, reg, lsl #N for N=0..31
21184          (T3) "ADD (SP plus register) */
21185       if (!valid && INSN0(8,5) == BITS4(1,0,0,0) // add
21186           && rD != 15 && rN == 13 && imm5 <= 31 && how == 0) {
21187          valid = True;
21188       }
21189       /* also allow "sub.w reg, sp, reg   lsl #N for N=0 .. 5
21190          (T1) "SUB (SP minus register) */
21191       if (!valid && INSN0(8,5) == BITS4(1,1,0,1) // sub
21192           && rD != 15 && rN == 13 && imm5 <= 5 && how == 0) {
21193          valid = True;
21194       }
21195       if (valid) {
21196          Bool   swap = False;
21197          IROp   op   = Iop_INVALID;
21198          const HChar* nm = "???";
21199          switch (INSN0(8,5)) {
21200             case BITS4(1,0,0,0): op = Iop_Add32; nm = "add"; break;
21201             case BITS4(1,1,0,1): op = Iop_Sub32; nm = "sub"; break;
21202             case BITS4(1,1,1,0): op = Iop_Sub32; nm = "rsb";
21203                                  swap = True; break;
21204             default: vassert(0);
21205          }
21206
21207          IRTemp argL = newTemp(Ity_I32);
21208          assign(argL, getIRegT(rN));
21209
21210          IRTemp rMt = newTemp(Ity_I32);
21211          assign(rMt, getIRegT(rM));
21212
21213          IRTemp argR = newTemp(Ity_I32);
21214          compute_result_and_C_after_shift_by_imm5(
21215             dis_buf, &argR, NULL, rMt, how, imm5, rM
21216          );
21217
21218          IRTemp res = newTemp(Ity_I32);
21219          assign(res, swap
21220                      ? binop(op, mkexpr(argR), mkexpr(argL))
21221                      : binop(op, mkexpr(argL), mkexpr(argR)));
21222
21223          putIRegT(rD, mkexpr(res), condT);
21224          if (bS) {
21225             switch (op) {
21226                case Iop_Add32:
21227                   setFlags_D1_D2( ARMG_CC_OP_ADD, argL, argR, condT );
21228                   break;
21229                case Iop_Sub32:
21230                   if (swap)
21231                      setFlags_D1_D2( ARMG_CC_OP_SUB, argR, argL, condT );
21232                   else
21233                      setFlags_D1_D2( ARMG_CC_OP_SUB, argL, argR, condT );
21234                   break;
21235                default:
21236                   vassert(0);
21237             }
21238          }
21239
21240          DIP("%s%s.w r%u, r%u, %s\n",
21241              nm, bS ? "s" : "", rD, rN, dis_buf);
21242          goto decode_success;
21243       }
21244    }
21245
21246    /* ---------- (T3) ADC{S}.W Rd, Rn, Rm, {shift} ---------- */
21247    /* ---------- (T2) SBC{S}.W Rd, Rn, Rm, {shift} ---------- */
21248    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21249        && (   INSN0(8,5) == BITS4(1,0,1,0)   // adc subopc
21250            || INSN0(8,5) == BITS4(1,0,1,1))  // sbc subopc
21251        && INSN1(15,15) == 0) {
21252       /* ADC:  Rd = Rn + shifter_operand + oldC */
21253       /* SBC:  Rd = Rn - shifter_operand - (oldC ^ 1) */
21254       UInt rN = INSN0(3,0);
21255       UInt rD = INSN1(11,8);
21256       UInt rM = INSN1(3,0);
21257       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21258          UInt bS   = INSN0(4,4);
21259          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21260          UInt how  = INSN1(5,4);
21261
21262          IRTemp argL = newTemp(Ity_I32);
21263          assign(argL, getIRegT(rN));
21264
21265          IRTemp rMt = newTemp(Ity_I32);
21266          assign(rMt, getIRegT(rM));
21267
21268          IRTemp oldC = newTemp(Ity_I32);
21269          assign(oldC, mk_armg_calculate_flag_c());
21270
21271          IRTemp argR = newTemp(Ity_I32);
21272          compute_result_and_C_after_shift_by_imm5(
21273             dis_buf, &argR, NULL, rMt, how, imm5, rM
21274          );
21275
21276          const HChar* nm  = "???";
21277          IRTemp res = newTemp(Ity_I32);
21278          switch (INSN0(8,5)) {
21279             case BITS4(1,0,1,0): // ADC
21280                nm = "adc";
21281                assign(res,
21282                       binop(Iop_Add32,
21283                             binop(Iop_Add32, mkexpr(argL), mkexpr(argR)),
21284                             mkexpr(oldC) ));
21285                putIRegT(rD, mkexpr(res), condT);
21286                if (bS)
21287                   setFlags_D1_D2_ND( ARMG_CC_OP_ADC,
21288                                      argL, argR, oldC, condT );
21289                break;
21290             case BITS4(1,0,1,1): // SBC
21291                nm = "sbc";
21292                assign(res,
21293                       binop(Iop_Sub32,
21294                             binop(Iop_Sub32, mkexpr(argL), mkexpr(argR)),
21295                             binop(Iop_Xor32, mkexpr(oldC), mkU32(1)) ));
21296                putIRegT(rD, mkexpr(res), condT);
21297                if (bS)
21298                   setFlags_D1_D2_ND( ARMG_CC_OP_SBB,
21299                                      argL, argR, oldC, condT );
21300                break;
21301             default:
21302                vassert(0);
21303          }
21304
21305          DIP("%s%s.w r%u, r%u, %s\n",
21306              nm, bS ? "s" : "", rD, rN, dis_buf);
21307          goto decode_success;
21308       }
21309    }
21310
21311    /* ---------- (T3) AND{S}.W Rd, Rn, Rm, {shift} ---------- */
21312    /* ---------- (T3) ORR{S}.W Rd, Rn, Rm, {shift} ---------- */
21313    /* ---------- (T3) EOR{S}.W Rd, Rn, Rm, {shift} ---------- */
21314    /* ---------- (T3) BIC{S}.W Rd, Rn, Rm, {shift} ---------- */
21315    /* ---------- (T1) ORN{S}.W Rd, Rn, Rm, {shift} ---------- */
21316    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21317        && (   INSN0(8,5) == BITS4(0,0,0,0)  // and subopc
21318            || INSN0(8,5) == BITS4(0,0,1,0)  // orr subopc
21319            || INSN0(8,5) == BITS4(0,1,0,0)  // eor subopc
21320            || INSN0(8,5) == BITS4(0,0,0,1)  // bic subopc
21321            || INSN0(8,5) == BITS4(0,0,1,1)) // orn subopc
21322        && INSN1(15,15) == 0) {
21323       UInt rN = INSN0(3,0);
21324       UInt rD = INSN1(11,8);
21325       UInt rM = INSN1(3,0);
21326       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
21327          Bool notArgR = False;
21328          IROp op      = Iop_INVALID;
21329          const HChar* nm  = "???";
21330          switch (INSN0(8,5)) {
21331             case BITS4(0,0,0,0): op = Iop_And32; nm = "and"; break;
21332             case BITS4(0,0,1,0): op = Iop_Or32;  nm = "orr"; break;
21333             case BITS4(0,1,0,0): op = Iop_Xor32; nm = "eor"; break;
21334             case BITS4(0,0,0,1): op = Iop_And32; nm = "bic";
21335                                  notArgR = True; break;
21336             case BITS4(0,0,1,1): op = Iop_Or32; nm = "orn";
21337                                  notArgR = True; break;
21338             default: vassert(0);
21339          }
21340          UInt bS   = INSN0(4,4);
21341          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21342          UInt how  = INSN1(5,4);
21343
21344          IRTemp rNt = newTemp(Ity_I32);
21345          assign(rNt, getIRegT(rN));
21346
21347          IRTemp rMt = newTemp(Ity_I32);
21348          assign(rMt, getIRegT(rM));
21349
21350          IRTemp argR = newTemp(Ity_I32);
21351          IRTemp oldC = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21352
21353          compute_result_and_C_after_shift_by_imm5(
21354             dis_buf, &argR, bS ? &oldC : NULL, rMt, how, imm5, rM
21355          );
21356
21357          IRTemp res = newTemp(Ity_I32);
21358          if (notArgR) {
21359             vassert(op == Iop_And32 || op == Iop_Or32);
21360             assign(res, binop(op, mkexpr(rNt),
21361                                   unop(Iop_Not32, mkexpr(argR))));
21362          } else {
21363             assign(res, binop(op, mkexpr(rNt), mkexpr(argR)));
21364          }
21365
21366          putIRegT(rD, mkexpr(res), condT);
21367          if (bS) {
21368             IRTemp oldV = newTemp(Ity_I32);
21369             assign( oldV, mk_armg_calculate_flag_v() );
21370             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21371                                condT );
21372          }
21373
21374          DIP("%s%s.w r%u, r%u, %s\n",
21375              nm, bS ? "s" : "", rD, rN, dis_buf);
21376          goto decode_success;
21377       }
21378    }
21379
21380    /* -------------- (T?) LSL{S}.W Rd, Rn, Rm -------------- */
21381    /* -------------- (T?) LSR{S}.W Rd, Rn, Rm -------------- */
21382    /* -------------- (T?) ASR{S}.W Rd, Rn, Rm -------------- */
21383    /* -------------- (T?) ROR{S}.W Rd, Rn, Rm -------------- */
21384    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,0,0)
21385        && INSN1(15,12) == BITS4(1,1,1,1)
21386        && INSN1(7,4) == BITS4(0,0,0,0)) {
21387       UInt how = INSN0(6,5); // standard encoding
21388       UInt rN  = INSN0(3,0);
21389       UInt rD  = INSN1(11,8);
21390       UInt rM  = INSN1(3,0);
21391       UInt bS  = INSN0(4,4);
21392       Bool valid = !isBadRegT(rN) && !isBadRegT(rM) && !isBadRegT(rD);
21393       if (valid) {
21394          IRTemp rNt    = newTemp(Ity_I32);
21395          IRTemp rMt    = newTemp(Ity_I32);
21396          IRTemp res    = newTemp(Ity_I32);
21397          IRTemp oldC   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21398          IRTemp oldV   = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21399          const HChar* nms[4] = { "lsl", "lsr", "asr", "ror" };
21400          const HChar* nm     = nms[how];
21401          assign(rNt, getIRegT(rN));
21402          assign(rMt, getIRegT(rM));
21403          compute_result_and_C_after_shift_by_reg(
21404             dis_buf, &res, bS ? &oldC : NULL,
21405             rNt, how, rMt, rN, rM
21406          );
21407          if (bS)
21408             assign(oldV, mk_armg_calculate_flag_v());
21409          putIRegT(rD, mkexpr(res), condT);
21410          if (bS) {
21411             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21412                                condT );
21413          }
21414          DIP("%s%s.w r%u, r%u, r%u\n",
21415              nm, bS ? "s" : "", rD, rN, rM);
21416          goto decode_success;
21417       }
21418    }
21419
21420    /* ------------ (T?) MOV{S}.W Rd, Rn, {shift} ------------ */
21421    /* ------------ (T?) MVN{S}.W Rd, Rn, {shift} ------------ */
21422    if ((INSN0(15,0) & 0xFFCF) == 0xEA4F
21423        && INSN1(15,15) == 0) {
21424       UInt rD      = INSN1(11,8);
21425       UInt rN      = INSN1(3,0);
21426       UInt bS      = INSN0(4,4);
21427       UInt isMVN   = INSN0(5,5);
21428       Bool regsOK  = (bS || isMVN)
21429                         ? (!isBadRegT(rD) && !isBadRegT(rN))
21430                         : (rD != 15 && rN != 15 && (rD != 13 || rN != 13));
21431       if (regsOK) {
21432          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
21433          UInt how   = INSN1(5,4);
21434
21435          IRTemp rNt = newTemp(Ity_I32);
21436          assign(rNt, getIRegT(rN));
21437
21438          IRTemp oldRn = newTemp(Ity_I32);
21439          IRTemp oldC  = bS ? newTemp(Ity_I32) : IRTemp_INVALID;
21440          compute_result_and_C_after_shift_by_imm5(
21441             dis_buf, &oldRn, bS ? &oldC : NULL, rNt, how, imm5, rN
21442          );
21443
21444          IRTemp res = newTemp(Ity_I32);
21445          assign(res, isMVN ? unop(Iop_Not32, mkexpr(oldRn))
21446                            : mkexpr(oldRn));
21447
21448          putIRegT(rD, mkexpr(res), condT);
21449          if (bS) {
21450             IRTemp oldV = newTemp(Ity_I32);
21451             assign( oldV, mk_armg_calculate_flag_v() );
21452             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV, condT);
21453          }
21454          DIP("%s%s.w r%u, %s\n",
21455              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, dis_buf);
21456          goto decode_success;
21457       }
21458    }
21459
21460    /* -------------- (T?) TST.W Rn, Rm, {shift} -------------- */
21461    /* -------------- (T?) TEQ.W Rn, Rm, {shift} -------------- */
21462    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21463        && (   INSN0(8,4) == BITS5(0,0,0,0,1)  // TST
21464            || INSN0(8,4) == BITS5(0,1,0,0,1)) // TEQ
21465        && INSN1(15,15) == 0
21466        && INSN1(11,8) == BITS4(1,1,1,1)) {
21467       UInt rN = INSN0(3,0);
21468       UInt rM = INSN1(3,0);
21469       if (!isBadRegT(rN) && !isBadRegT(rM)) {
21470          Bool isTST = INSN0(8,4) == BITS5(0,0,0,0,1);
21471
21472          UInt how  = INSN1(5,4);
21473          UInt imm5 = (INSN1(14,12) << 2) | INSN1(7,6);
21474
21475          IRTemp argL = newTemp(Ity_I32);
21476          assign(argL, getIRegT(rN));
21477
21478          IRTemp rMt = newTemp(Ity_I32);
21479          assign(rMt, getIRegT(rM));
21480
21481          IRTemp argR = newTemp(Ity_I32);
21482          IRTemp oldC = newTemp(Ity_I32);
21483          compute_result_and_C_after_shift_by_imm5(
21484             dis_buf, &argR, &oldC, rMt, how, imm5, rM
21485          );
21486
21487          IRTemp oldV = newTemp(Ity_I32);
21488          assign( oldV, mk_armg_calculate_flag_v() );
21489
21490          IRTemp res = newTemp(Ity_I32);
21491          assign(res, binop(isTST ? Iop_And32 : Iop_Xor32,
21492                            mkexpr(argL), mkexpr(argR)));
21493
21494          setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21495                             condT );
21496          DIP("%s.w r%u, %s\n", isTST ? "tst" : "teq", rN, dis_buf);
21497          goto decode_success;
21498       }
21499    }
21500
21501    /* -------------- (T3) CMP.W Rn, Rm, {shift} -------------- */
21502    /* -------------- (T2) CMN.W Rn, Rm, {shift} -------------- */
21503    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,1)
21504        && (   INSN0(8,4) == BITS5(1,1,0,1,1)  // CMP
21505            || INSN0(8,4) == BITS5(1,0,0,0,1)) // CMN
21506        && INSN1(15,15) == 0
21507        && INSN1(11,8) == BITS4(1,1,1,1)) {
21508       UInt rN = INSN0(3,0);
21509       UInt rM = INSN1(3,0);
21510       if (!isBadRegT(rN) && !isBadRegT(rM)) {
21511          Bool isCMN = INSN0(8,4) == BITS5(1,0,0,0,1);
21512          UInt how   = INSN1(5,4);
21513          UInt imm5  = (INSN1(14,12) << 2) | INSN1(7,6);
21514
21515          IRTemp argL = newTemp(Ity_I32);
21516          assign(argL, getIRegT(rN));
21517
21518          IRTemp rMt = newTemp(Ity_I32);
21519          assign(rMt, getIRegT(rM));
21520
21521          IRTemp argR = newTemp(Ity_I32);
21522          compute_result_and_C_after_shift_by_imm5(
21523             dis_buf, &argR, NULL, rMt, how, imm5, rM
21524          );
21525
21526          setFlags_D1_D2( isCMN ? ARMG_CC_OP_ADD : ARMG_CC_OP_SUB,
21527                          argL, argR, condT );
21528
21529          DIP("%s.w r%u, %s\n", isCMN ? "cmn" : "cmp", rN, dis_buf);
21530          goto decode_success;
21531       }
21532    }
21533
21534    /* -------------- (T2) MOV{S}.W Rd, #constT -------------- */
21535    /* -------------- (T2) MVN{S}.W Rd, #constT -------------- */
21536    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21537        && (   INSN0(9,5) == BITS5(0,0,0,1,0)  // MOV
21538            || INSN0(9,5) == BITS5(0,0,0,1,1)) // MVN
21539        && INSN0(3,0) == BITS4(1,1,1,1)
21540        && INSN1(15,15) == 0) {
21541       UInt rD = INSN1(11,8);
21542       if (!isBadRegT(rD)) {
21543          Bool   updC  = False;
21544          UInt   bS    = INSN0(4,4);
21545          Bool   isMVN = INSN0(5,5) == 1;
21546          UInt   imm32 = thumbExpandImm_from_I0_I1(&updC, insn0, insn1);
21547          IRTemp res   = newTemp(Ity_I32);
21548          assign(res, mkU32(isMVN ? ~imm32 : imm32));
21549          putIRegT(rD, mkexpr(res), condT);
21550          if (bS) {
21551             IRTemp oldV = newTemp(Ity_I32);
21552             IRTemp oldC = newTemp(Ity_I32);
21553             assign( oldV, mk_armg_calculate_flag_v() );
21554             assign( oldC, updC
21555                           ? mkU32((imm32 >> 31) & 1)
21556                           : mk_armg_calculate_flag_c() );
21557             setFlags_D1_D2_ND( ARMG_CC_OP_LOGIC, res, oldC, oldV,
21558                                condT );
21559          }
21560          DIP("%s%s.w r%u, #%u\n",
21561              isMVN ? "mvn" : "mov", bS ? "s" : "", rD, imm32);
21562          goto decode_success;
21563       }
21564    }
21565
21566    /* -------------- (T3) MOVW Rd, #imm16 -------------- */
21567    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21568        && INSN0(9,4) == BITS6(1,0,0,1,0,0)
21569        && INSN1(15,15) == 0) {
21570       UInt rD = INSN1(11,8);
21571       if (!isBadRegT(rD)) {
21572          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
21573                       | (INSN1(14,12) << 8) | INSN1(7,0);
21574          putIRegT(rD, mkU32(imm16), condT);
21575          DIP("movw r%u, #%u\n", rD, imm16);
21576          goto decode_success;
21577       }
21578    }
21579
21580    /* ---------------- MOVT Rd, #imm16 ---------------- */
21581    if (INSN0(15,11) == BITS5(1,1,1,1,0)
21582        && INSN0(9,4) == BITS6(1,0,1,1,0,0)
21583        && INSN1(15,15) == 0) {
21584       UInt rD = INSN1(11,8);
21585       if (!isBadRegT(rD)) {
21586          UInt imm16 = (INSN0(3,0) << 12) | (INSN0(10,10) << 11)
21587                       | (INSN1(14,12) << 8) | INSN1(7,0);
21588          IRTemp res = newTemp(Ity_I32);
21589          assign(res,
21590                 binop(Iop_Or32,
21591                       binop(Iop_And32, getIRegT(rD), mkU32(0xFFFF)),
21592                       mkU32(imm16 << 16)));
21593          putIRegT(rD, mkexpr(res), condT);
21594          DIP("movt r%u, #%u\n", rD, imm16);
21595          goto decode_success;
21596       }
21597    }
21598
21599    /* ---------------- LD/ST reg+/-#imm8 ---------------- */
21600    /* Loads and stores of the form:
21601          op  Rt, [Rn, #-imm8]      or
21602          op  Rt, [Rn], #+/-imm8    or
21603          op  Rt, [Rn, #+/-imm8]!
21604       where op is one of
21605          ldrb ldrh ldr  ldrsb ldrsh
21606          strb strh str
21607    */
21608    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0) && INSN1(11,11) == 1) {
21609       Bool   valid  = True;
21610       Bool   syned  = False;
21611       Bool   isST   = False;
21612       IRType ty     = Ity_I8;
21613       const HChar* nm = "???";
21614
21615       switch (INSN0(8,4)) {
21616          case BITS5(0,0,0,0,0):   // strb
21617             nm = "strb"; isST = True; break;
21618          case BITS5(0,0,0,0,1):   // ldrb
21619             nm = "ldrb"; break;
21620          case BITS5(1,0,0,0,1):   // ldrsb
21621             nm = "ldrsb"; syned = True; break;
21622          case BITS5(0,0,0,1,0):   // strh
21623             nm = "strh"; ty = Ity_I16; isST = True; break;
21624          case BITS5(0,0,0,1,1):   // ldrh
21625             nm = "ldrh"; ty = Ity_I16; break;
21626          case BITS5(1,0,0,1,1):   // ldrsh
21627             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
21628          case BITS5(0,0,1,0,0):   // str
21629             nm = "str"; ty = Ity_I32; isST = True; break;
21630          case BITS5(0,0,1,0,1):
21631             nm = "ldr"; ty = Ity_I32; break;  // ldr
21632          default:
21633             valid = False; break;
21634       }
21635
21636       UInt rN      = INSN0(3,0);
21637       UInt rT      = INSN1(15,12);
21638       UInt bP      = INSN1(10,10);
21639       UInt bU      = INSN1(9,9);
21640       UInt bW      = INSN1(8,8);
21641       UInt imm8    = INSN1(7,0);
21642       Bool loadsPC = False;
21643
21644       if (valid) {
21645          if (bP == 1 && bU == 1 && bW == 0)
21646             valid = False;
21647          if (bP == 0 && bW == 0)
21648             valid = False;
21649          if (rN == 15)
21650             valid = False;
21651          if (bW == 1 && rN == rT)
21652             valid = False;
21653          if (ty == Ity_I8 || ty == Ity_I16) {
21654             if (isBadRegT(rT))
21655                valid = False;
21656          } else {
21657             /* ty == Ity_I32 */
21658             if (isST && rT == 15)
21659                valid = False;
21660             if (!isST && rT == 15)
21661                loadsPC = True;
21662          }
21663       }
21664
21665       if (valid) {
21666          // if it's a branch, it can't happen in the middle of an IT block
21667          // Also, if it is a branch, make it unconditional at this point.
21668          // Doing conditional branches in-line is too complex (for now)
21669          if (loadsPC) {
21670             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
21671             // go uncond
21672             mk_skip_over_T32_if_cond_is_false(condT);
21673             condT = IRTemp_INVALID;
21674             // now uncond
21675          }
21676
21677          IRTemp preAddr = newTemp(Ity_I32);
21678          assign(preAddr, getIRegT(rN));
21679
21680          IRTemp postAddr = newTemp(Ity_I32);
21681          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
21682                                 mkexpr(preAddr), mkU32(imm8)));
21683
21684          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
21685
21686          if (isST) {
21687
21688             /* Store.  If necessary, update the base register before
21689                the store itself, so that the common idiom of "str rX,
21690                [sp, #-4]!" (store rX at sp-4, then do new sp = sp-4,
21691                a.k.a "push rX") doesn't cause Memcheck to complain
21692                that the access is below the stack pointer.  Also, not
21693                updating sp before the store confuses Valgrind's
21694                dynamic stack-extending logic.  So do it before the
21695                store.  Hence we need to snarf the store data before
21696                doing the basereg update. */
21697
21698             /* get hold of the data to be stored */
21699             IRTemp oldRt = newTemp(Ity_I32);
21700             assign(oldRt, getIRegT(rT));
21701
21702             /* Update Rn if necessary. */
21703             if (bW == 1) {
21704                vassert(rN != rT); // assured by validity check above
21705                putIRegT(rN, mkexpr(postAddr), condT);
21706             }
21707
21708             /* generate the transfer */
21709             IRExpr* data = NULL;
21710             switch (ty) {
21711                case Ity_I8:
21712                   data = unop(Iop_32to8, mkexpr(oldRt));
21713                   break;
21714                case Ity_I16:
21715                   data = unop(Iop_32to16, mkexpr(oldRt));
21716                   break;
21717                case Ity_I32:
21718                   data = mkexpr(oldRt);
21719                   break;
21720                default:
21721                   vassert(0);
21722             }
21723             storeGuardedLE(mkexpr(transAddr), data, condT);
21724
21725          } else {
21726
21727             /* Load. */
21728             IRTemp llOldRt = newTemp(Ity_I32);
21729             assign(llOldRt, llGetIReg(rT));
21730
21731             /* generate the transfer */
21732             IRTemp    newRt = newTemp(Ity_I32);
21733             IRLoadGOp widen = ILGop_INVALID;
21734             switch (ty) {
21735                case Ity_I8:
21736                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
21737                case Ity_I16:
21738                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
21739                case Ity_I32:
21740                   widen = ILGop_Ident32; break;
21741                default:
21742                   vassert(0);
21743             }
21744             loadGuardedLE(newRt, widen,
21745                           mkexpr(transAddr), mkexpr(llOldRt), condT);
21746             if (rT == 15) {
21747                vassert(loadsPC);
21748                /* We'll do the write to the PC just below */
21749             } else {
21750                vassert(!loadsPC);
21751                /* IRTemp_INVALID is OK here because in the case where
21752                   condT is false at run time, we're just putting the
21753                   old rT value back. */
21754                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21755             }
21756
21757             /* Update Rn if necessary. */
21758             if (bW == 1) {
21759                vassert(rN != rT); // assured by validity check above
21760                putIRegT(rN, mkexpr(postAddr), condT);
21761             }
21762
21763             if (loadsPC) {
21764                /* Presumably this is an interworking branch. */
21765                vassert(rN != 15); // assured by validity check above
21766                vassert(rT == 15);
21767                vassert(condT == IRTemp_INVALID); /* due to check above */
21768                llPutIReg(15, mkexpr(newRt));
21769                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
21770                dres.whatNext    = Dis_StopHere;
21771             }
21772          }
21773
21774          if (bP == 1 && bW == 0) {
21775             DIP("%s.w r%u, [r%u, #%c%u]\n",
21776                 nm, rT, rN, bU ? '+' : '-', imm8);
21777          }
21778          else if (bP == 1 && bW == 1) {
21779             DIP("%s.w r%u, [r%u, #%c%u]!\n",
21780                 nm, rT, rN, bU ? '+' : '-', imm8);
21781          }
21782          else {
21783             vassert(bP == 0 && bW == 1);
21784             DIP("%s.w r%u, [r%u], #%c%u\n",
21785                 nm, rT, rN, bU ? '+' : '-', imm8);
21786          }
21787
21788          goto decode_success;
21789       }
21790    }
21791
21792    /* ------------- LD/ST reg+(reg<<imm2) ------------- */
21793    /* Loads and stores of the form:
21794          op  Rt, [Rn, Rm, LSL #imm8]
21795       where op is one of
21796          ldrb ldrh ldr  ldrsb ldrsh
21797          strb strh str
21798    */
21799    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)
21800        && INSN1(11,6) == BITS6(0,0,0,0,0,0)) {
21801       Bool   valid  = True;
21802       Bool   syned  = False;
21803       Bool   isST   = False;
21804       IRType ty     = Ity_I8;
21805       const HChar* nm = "???";
21806
21807       switch (INSN0(8,4)) {
21808          case BITS5(0,0,0,0,0):   // strb
21809             nm = "strb"; isST = True; break;
21810          case BITS5(0,0,0,0,1):   // ldrb
21811             nm = "ldrb"; break;
21812          case BITS5(1,0,0,0,1):   // ldrsb
21813             nm = "ldrsb"; syned = True; break;
21814          case BITS5(0,0,0,1,0):   // strh
21815             nm = "strh"; ty = Ity_I16; isST = True; break;
21816          case BITS5(0,0,0,1,1):   // ldrh
21817             nm = "ldrh"; ty = Ity_I16; break;
21818          case BITS5(1,0,0,1,1):   // ldrsh
21819             nm = "ldrsh"; ty = Ity_I16; syned = True; break;
21820          case BITS5(0,0,1,0,0):   // str
21821             nm = "str"; ty = Ity_I32; isST = True; break;
21822          case BITS5(0,0,1,0,1):
21823             nm = "ldr"; ty = Ity_I32; break;  // ldr
21824          default:
21825             valid = False; break;
21826       }
21827
21828       UInt rN      = INSN0(3,0);
21829       UInt rM      = INSN1(3,0);
21830       UInt rT      = INSN1(15,12);
21831       UInt imm2    = INSN1(5,4);
21832       Bool loadsPC = False;
21833
21834       if (ty == Ity_I8 || ty == Ity_I16) {
21835          /* all 8- and 16-bit load and store cases have the
21836             same exclusion set. */
21837          if (rN == 15 || isBadRegT(rT) || isBadRegT(rM))
21838             valid = False;
21839       } else {
21840          vassert(ty == Ity_I32);
21841          if (rN == 15 || isBadRegT(rM))
21842             valid = False;
21843          if (isST && rT == 15)
21844             valid = False;
21845          /* If it is a load and rT is 15, that's only allowable if we
21846             not in an IT block, or are the last in it.  Need to insert
21847             a dynamic check for that. */
21848          if (!isST && rT == 15)
21849             loadsPC = True;
21850       }
21851
21852       if (valid) {
21853          // if it's a branch, it can't happen in the middle of an IT block
21854          // Also, if it is a branch, make it unconditional at this point.
21855          // Doing conditional branches in-line is too complex (for now)
21856          if (loadsPC) {
21857             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
21858             // go uncond
21859             mk_skip_over_T32_if_cond_is_false(condT);
21860             condT = IRTemp_INVALID;
21861             // now uncond
21862          }
21863
21864          IRTemp transAddr = newTemp(Ity_I32);
21865          assign(transAddr,
21866                 binop( Iop_Add32,
21867                        getIRegT(rN),
21868                        binop(Iop_Shl32, getIRegT(rM), mkU8(imm2)) ));
21869
21870          if (isST) {
21871
21872             /* get hold of the data to be stored */
21873             IRTemp oldRt = newTemp(Ity_I32);
21874             assign(oldRt, getIRegT(rT));
21875
21876             /* generate the transfer */
21877             IRExpr* data = NULL;
21878             switch (ty) {
21879                case Ity_I8:
21880                   data = unop(Iop_32to8, mkexpr(oldRt));
21881                   break;
21882                case Ity_I16:
21883                   data = unop(Iop_32to16, mkexpr(oldRt));
21884                   break;
21885               case Ity_I32:
21886                   data = mkexpr(oldRt);
21887                   break;
21888               default:
21889                  vassert(0);
21890             }
21891             storeGuardedLE(mkexpr(transAddr), data, condT);
21892
21893          } else {
21894
21895             /* Load. */
21896             IRTemp llOldRt = newTemp(Ity_I32);
21897             assign(llOldRt, llGetIReg(rT));
21898
21899             /* generate the transfer */
21900             IRTemp    newRt = newTemp(Ity_I32);
21901             IRLoadGOp widen = ILGop_INVALID;
21902             switch (ty) {
21903                case Ity_I8:
21904                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
21905                case Ity_I16:
21906                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
21907                case Ity_I32:
21908                   widen = ILGop_Ident32; break;
21909                default:
21910                   vassert(0);
21911             }
21912             loadGuardedLE(newRt, widen,
21913                           mkexpr(transAddr), mkexpr(llOldRt), condT);
21914
21915             if (rT == 15) {
21916                vassert(loadsPC);
21917                /* We'll do the write to the PC just below */
21918             } else {
21919                vassert(!loadsPC);
21920                /* IRTemp_INVALID is OK here because in the case where
21921                   condT is false at run time, we're just putting the
21922                   old rT value back. */
21923                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
21924             }
21925
21926             if (loadsPC) {
21927                /* Presumably this is an interworking branch. */
21928                vassert(rN != 15); // assured by validity check above
21929                vassert(rT == 15);
21930                vassert(condT == IRTemp_INVALID); /* due to check above */
21931                llPutIReg(15, mkexpr(newRt));
21932                dres.jk_StopHere = Ijk_Boring;  /* or _Ret ? */
21933                dres.whatNext    = Dis_StopHere;
21934             }
21935          }
21936
21937          DIP("%s.w r%u, [r%u, r%u, LSL #%u]\n",
21938              nm, rT, rN, rM, imm2);
21939
21940          goto decode_success;
21941       }
21942    }
21943
21944    /* --------------- LD/ST reg+imm12 --------------- */
21945    /* Loads and stores of the form:
21946          op  Rt, [Rn, #+-imm12]
21947       where op is one of
21948          ldrb ldrh ldr  ldrsb ldrsh
21949          strb strh str
21950    */
21951    if (INSN0(15,9) == BITS7(1,1,1,1,1,0,0)) {
21952       Bool   valid  = True;
21953       Bool   syned  = INSN0(8,8) == 1;
21954       Bool   isST   = False;
21955       IRType ty     = Ity_I8;
21956       UInt   bU     = INSN0(7,7); // 1: +imm   0: -imm
21957                                   // -imm is only supported by literal versions
21958       const HChar* nm = "???";
21959
21960       switch (INSN0(6,4)) {
21961          case BITS3(0,0,0):   // strb
21962             nm = "strb"; isST = True; break;
21963          case BITS3(0,0,1):   // ldrb
21964             nm = syned ? "ldrsb" : "ldrb"; break;
21965          case BITS3(0,1,0):   // strh
21966             nm = "strh"; ty = Ity_I16; isST = True; break;
21967          case BITS3(0,1,1):   // ldrh
21968             nm = syned ? "ldrsh" : "ldrh"; ty = Ity_I16; break;
21969          case BITS3(1,0,0):   // str
21970             nm = "str"; ty = Ity_I32; isST = True; break;
21971          case BITS3(1,0,1):
21972             nm = "ldr"; ty = Ity_I32; break;  // ldr
21973          default:
21974             valid = False; break;
21975       }
21976
21977       UInt rN      = INSN0(3,0);
21978       UInt rT      = INSN1(15,12);
21979       UInt imm12   = INSN1(11,0);
21980       Bool loadsPC = False;
21981
21982       if (rN != 15 && bU == 0) {
21983          // only pc supports #-imm12
21984          valid = False;
21985       }
21986
21987       if (isST) {
21988          if (syned) valid = False;
21989          if (rN == 15 || rT == 15)
21990             valid = False;
21991       } else {
21992          /* For a 32-bit load, rT == 15 is only allowable if we are not
21993             in an IT block, or are the last in it.  Need to insert
21994             a dynamic check for that.  Also, in this particular
21995             case, rN == 15 is allowable.  In this case however, the
21996             value obtained for rN is (apparently)
21997             "word-align(address of current insn + 4)". */
21998          if (rT == 15) {
21999             if (ty == Ity_I32)
22000                loadsPC = True;
22001             else // Can't do it for B/H loads
22002                valid = False;
22003          }
22004       }
22005
22006       if (valid) {
22007          // if it's a branch, it can't happen in the middle of an IT block
22008          // Also, if it is a branch, make it unconditional at this point.
22009          // Doing conditional branches in-line is too complex (for now)
22010          if (loadsPC) {
22011             gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
22012             // go uncond
22013             mk_skip_over_T32_if_cond_is_false(condT);
22014             condT = IRTemp_INVALID;
22015             // now uncond
22016          }
22017
22018          IRTemp rNt = newTemp(Ity_I32);
22019          if (rN == 15) {
22020             vassert(!isST);
22021             assign(rNt, binop(Iop_And32, getIRegT(15), mkU32(~3)));
22022          } else {
22023             assign(rNt, getIRegT(rN));
22024          }
22025
22026          IRTemp transAddr = newTemp(Ity_I32);
22027          assign(transAddr,
22028                 binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
22029                       mkexpr(rNt), mkU32(imm12)));
22030
22031          IRTemp oldRt = newTemp(Ity_I32);
22032          assign(oldRt, getIRegT(rT));
22033
22034          IRTemp llOldRt = newTemp(Ity_I32);
22035          assign(llOldRt, llGetIReg(rT));
22036
22037          if (isST) {
22038             IRExpr* data = NULL;
22039             switch (ty) {
22040                case Ity_I8:
22041                   data = unop(Iop_32to8, mkexpr(oldRt));
22042                   break;
22043                case Ity_I16:
22044                   data = unop(Iop_32to16, mkexpr(oldRt));
22045                   break;
22046               case Ity_I32:
22047                   data = mkexpr(oldRt);
22048                   break;
22049               default:
22050                  vassert(0);
22051             }
22052             storeGuardedLE(mkexpr(transAddr), data, condT);
22053          } else {
22054             IRTemp    newRt = newTemp(Ity_I32);
22055             IRLoadGOp widen = ILGop_INVALID;
22056             switch (ty) {
22057                case Ity_I8:
22058                   widen = syned ? ILGop_8Sto32 : ILGop_8Uto32; break;
22059                case Ity_I16:
22060                   widen = syned ? ILGop_16Sto32 : ILGop_16Uto32; break;
22061                case Ity_I32:
22062                   widen = ILGop_Ident32; break;
22063                default:
22064                   vassert(0);
22065             }
22066             loadGuardedLE(newRt, widen,
22067                           mkexpr(transAddr), mkexpr(llOldRt), condT);
22068             if (rT == 15) {
22069                vassert(loadsPC);
22070                /* We'll do the write to the PC just below */
22071             } else {
22072                vassert(!loadsPC);
22073                /* IRTemp_INVALID is OK here because in the case where
22074                   condT is false at run time, we're just putting the
22075                   old rT value back. */
22076                putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
22077             }
22078
22079             if (loadsPC) {
22080                /* Presumably this is an interworking branch. */
22081                vassert(rT == 15);
22082                vassert(condT == IRTemp_INVALID); /* due to check above */
22083                llPutIReg(15, mkexpr(newRt));
22084                dres.jk_StopHere = Ijk_Boring;
22085                dres.whatNext    = Dis_StopHere;
22086             }
22087          }
22088
22089          DIP("%s.w r%u, [r%u, +#%u]\n", nm, rT, rN, imm12);
22090
22091          goto decode_success;
22092       }
22093    }
22094
22095    /* -------------- LDRD/STRD reg+/-#imm8 -------------- */
22096    /* Doubleword loads and stores of the form:
22097          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]    or
22098          ldrd/strd  Rt, Rt2, [Rn], #+/-imm8    or
22099          ldrd/strd  Rt, Rt2, [Rn, #+/-imm8]!
22100    */
22101    if (INSN0(15,9) == BITS7(1,1,1,0,1,0,0) && INSN0(6,6) == 1) {
22102       UInt bP   = INSN0(8,8);
22103       UInt bU   = INSN0(7,7);
22104       UInt bW   = INSN0(5,5);
22105       UInt bL   = INSN0(4,4);  // 1: load  0: store
22106       UInt rN   = INSN0(3,0);
22107       UInt rT   = INSN1(15,12);
22108       UInt rT2  = INSN1(11,8);
22109       UInt imm8 = INSN1(7,0);
22110
22111       Bool valid = True;
22112       if (bP == 0 && bW == 0)                 valid = False;
22113       if (bW == 1 && (rN == rT || rN == rT2)) valid = False;
22114       if (isBadRegT(rT) || isBadRegT(rT2))    valid = False;
22115       if (bL == 1 && rT == rT2)               valid = False;
22116       /* It's OK to use PC as the base register only in the
22117          following case: ldrd Rt, Rt2, [PC, #+/-imm8] */
22118       if (rN == 15 && (bL == 0/*store*/
22119                        || bW == 1/*wb*/))     valid = False;
22120
22121       if (valid) {
22122          IRTemp preAddr = newTemp(Ity_I32);
22123          assign(preAddr, 15 == rN
22124                            ? binop(Iop_And32, getIRegT(15), mkU32(~3U))
22125                            : getIRegT(rN));
22126
22127          IRTemp postAddr = newTemp(Ity_I32);
22128          assign(postAddr, binop(bU == 1 ? Iop_Add32 : Iop_Sub32,
22129                                 mkexpr(preAddr), mkU32(imm8 << 2)));
22130
22131          IRTemp transAddr = bP == 1 ? postAddr : preAddr;
22132
22133          /* For almost all cases, we do the writeback after the transfers.
22134             However, that leaves the stack "uncovered" in cases like:
22135                strd    rD, [sp, #-8]
22136                strd    rD, [sp, #-16]
22137             In which case, do the writeback to SP now, instead of later.
22138             This is bad in that it makes the insn non-restartable if the
22139             accesses fault, but at least keeps Memcheck happy. */
22140          Bool writeback_already_done = False;
22141          if (bL == 0/*store*/ && bW == 1/*wb*/
22142              && rN == 13 && rN != rT && rN != rT2
22143              && bU == 0/*minus*/
22144              && ((imm8 << 2) == 8 || (imm8 << 2) == 16)) {
22145             putIRegT(rN, mkexpr(postAddr), condT);
22146             writeback_already_done = True;
22147          }
22148
22149          if (bL == 0) {
22150             IRTemp oldRt  = newTemp(Ity_I32);
22151             IRTemp oldRt2 = newTemp(Ity_I32);
22152             assign(oldRt,  getIRegT(rT));
22153             assign(oldRt2, getIRegT(rT2));
22154             storeGuardedLE( mkexpr(transAddr),
22155                             mkexpr(oldRt), condT );
22156             storeGuardedLE( binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
22157                             mkexpr(oldRt2), condT );
22158          } else {
22159             IRTemp oldRt  = newTemp(Ity_I32);
22160             IRTemp oldRt2 = newTemp(Ity_I32);
22161             IRTemp newRt  = newTemp(Ity_I32);
22162             IRTemp newRt2 = newTemp(Ity_I32);
22163             assign(oldRt,  llGetIReg(rT));
22164             assign(oldRt2, llGetIReg(rT2));
22165             loadGuardedLE( newRt, ILGop_Ident32,
22166                            mkexpr(transAddr),
22167                            mkexpr(oldRt), condT );
22168             loadGuardedLE( newRt2, ILGop_Ident32,
22169                            binop(Iop_Add32, mkexpr(transAddr), mkU32(4)),
22170                            mkexpr(oldRt2), condT );
22171             /* Put unconditionally, since we already switched on the condT
22172                in the guarded loads. */
22173             putIRegT(rT,  mkexpr(newRt),  IRTemp_INVALID);
22174             putIRegT(rT2, mkexpr(newRt2), IRTemp_INVALID);
22175          }
22176
22177          if (bW == 1 && !writeback_already_done) {
22178             putIRegT(rN, mkexpr(postAddr), condT);
22179          }
22180
22181          const HChar* nm = bL ? "ldrd" : "strd";
22182
22183          if (bP == 1 && bW == 0) {
22184             DIP("%s.w r%u, r%u, [r%u, #%c%u]\n",
22185                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
22186          }
22187          else if (bP == 1 && bW == 1) {
22188             DIP("%s.w r%u, r%u, [r%u, #%c%u]!\n",
22189                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
22190          }
22191          else {
22192             vassert(bP == 0 && bW == 1);
22193             DIP("%s.w r%u, r%u, [r%u], #%c%u\n",
22194                 nm, rT, rT2, rN, bU ? '+' : '-', imm8 << 2);
22195          }
22196
22197          goto decode_success;
22198       }
22199    }
22200
22201    /* -------------- (T3) Bcond.W label -------------- */
22202    /* This variant carries its own condition, so can't be part of an
22203       IT block ... */
22204    if (INSN0(15,11) == BITS5(1,1,1,1,0)
22205        && INSN1(15,14) == BITS2(1,0)
22206        && INSN1(12,12) == 0) {
22207       UInt cond = INSN0(9,6);
22208       if (cond != ARMCondAL && cond != ARMCondNV) {
22209          UInt uimm21
22210             =   (INSN0(10,10) << (1 + 1 + 6 + 11 + 1))
22211               | (INSN1(11,11) << (1 + 6 + 11 + 1))
22212               | (INSN1(13,13) << (6 + 11 + 1))
22213               | (INSN0(5,0)   << (11 + 1))
22214               | (INSN1(10,0)  << 1);
22215          uimm21 <<= 11;
22216          Int simm21 = (Int)uimm21;
22217          simm21 >>= 11;
22218
22219          vassert(0 == (guest_R15_curr_instr_notENC & 1));
22220          UInt dst = simm21 + guest_R15_curr_instr_notENC + 4;
22221
22222          /* Not allowed in an IT block; SIGILL if so. */
22223          gen_SIGILL_T_if_in_ITBlock(old_itstate, new_itstate);
22224
22225          IRTemp kondT = newTemp(Ity_I32);
22226          assign( kondT, mk_armg_calculate_condition(cond) );
22227          stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
22228                             Ijk_Boring,
22229                             IRConst_U32(dst | 1/*CPSR.T*/),
22230                             OFFB_R15T ));
22231          llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 4)
22232                               | 1 /*CPSR.T*/ ));
22233          dres.jk_StopHere = Ijk_Boring;
22234          dres.whatNext    = Dis_StopHere;
22235          DIP("b%s.w 0x%x\n", nCC(cond), dst);
22236          goto decode_success;
22237       }
22238    }
22239
22240    /* ---------------- (T4) B.W label ---------------- */
22241    /* ... whereas this variant doesn't carry its own condition, so it
22242       has to be either unconditional or the conditional by virtue of
22243       being the last in an IT block.  The upside is that there's 4
22244       more bits available for the jump offset, so it has a 16-times
22245       greater branch range than the T3 variant. */
22246    if (INSN0(15,11) == BITS5(1,1,1,1,0)
22247        && INSN1(15,14) == BITS2(1,0)
22248        && INSN1(12,12) == 1) {
22249       if (1) {
22250          UInt bS  = INSN0(10,10);
22251          UInt bJ1 = INSN1(13,13);
22252          UInt bJ2 = INSN1(11,11);
22253          UInt bI1 = 1 ^ (bJ1 ^ bS);
22254          UInt bI2 = 1 ^ (bJ2 ^ bS);
22255          UInt uimm25
22256             =   (bS          << (1 + 1 + 10 + 11 + 1))
22257               | (bI1         << (1 + 10 + 11 + 1))
22258               | (bI2         << (10 + 11 + 1))
22259               | (INSN0(9,0)  << (11 + 1))
22260               | (INSN1(10,0) << 1);
22261          uimm25 <<= 7;
22262          Int simm25 = (Int)uimm25;
22263          simm25 >>= 7;
22264
22265          vassert(0 == (guest_R15_curr_instr_notENC & 1));
22266          UInt dst = simm25 + guest_R15_curr_instr_notENC + 4;
22267
22268          /* If in an IT block, must be the last insn. */
22269          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
22270
22271          // go uncond
22272          mk_skip_over_T32_if_cond_is_false(condT);
22273          condT = IRTemp_INVALID;
22274          // now uncond
22275
22276          // branch to dst
22277          llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
22278          dres.jk_StopHere = Ijk_Boring;
22279          dres.whatNext    = Dis_StopHere;
22280          DIP("b.w 0x%x\n", dst);
22281          goto decode_success;
22282       }
22283    }
22284
22285    /* ------------------ TBB, TBH ------------------ */
22286    if (INSN0(15,4) == 0xE8D && INSN1(15,5) == 0x780) {
22287       UInt rN = INSN0(3,0);
22288       UInt rM = INSN1(3,0);
22289       UInt bH = INSN1(4,4);
22290       if (bH/*ATC*/ || (rN != 13 && !isBadRegT(rM))) {
22291          /* Must be last or not-in IT block */
22292          gen_SIGILL_T_if_in_but_NLI_ITBlock(old_itstate, new_itstate);
22293          /* Go uncond */
22294          mk_skip_over_T32_if_cond_is_false(condT);
22295          condT = IRTemp_INVALID;
22296
22297          IRExpr* ea
22298              = binop(Iop_Add32,
22299                      getIRegT(rN),
22300                      bH ? binop(Iop_Shl32, getIRegT(rM), mkU8(1))
22301                         : getIRegT(rM));
22302
22303          IRTemp delta = newTemp(Ity_I32);
22304          if (bH) {
22305             assign(delta, unop(Iop_16Uto32, loadLE(Ity_I16, ea)));
22306          } else {
22307             assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
22308          }
22309
22310          llPutIReg(
22311             15,
22312             binop(Iop_Or32,
22313                   binop(Iop_Add32,
22314                         getIRegT(15),
22315                         binop(Iop_Shl32, mkexpr(delta), mkU8(1))
22316                   ),
22317                   mkU32(1)
22318          ));
22319          dres.jk_StopHere = Ijk_Boring;
22320          dres.whatNext    = Dis_StopHere;
22321          DIP("tb%c [r%u, r%u%s]\n",
22322              bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
22323          goto decode_success;
22324       }
22325    }
22326
22327    /* ------------------ UBFX ------------------ */
22328    /* ------------------ SBFX ------------------ */
22329    /* There's also ARM versions of same, but it doesn't seem worth the
22330       hassle to common up the handling (it's only a couple of C
22331       statements). */
22332    if ((INSN0(15,4) == 0xF3C // UBFX
22333         || INSN0(15,4) == 0xF34) // SBFX
22334        && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
22335       UInt rN  = INSN0(3,0);
22336       UInt rD  = INSN1(11,8);
22337       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
22338       UInt wm1 = INSN1(4,0);
22339       UInt msb =  lsb + wm1;
22340       if (!isBadRegT(rD) && !isBadRegT(rN) && msb <= 31) {
22341          Bool   isU  = INSN0(15,4) == 0xF3C;
22342          IRTemp src  = newTemp(Ity_I32);
22343          IRTemp tmp  = newTemp(Ity_I32);
22344          IRTemp res  = newTemp(Ity_I32);
22345          UInt   mask = ((1 << wm1) - 1) + (1 << wm1);
22346          vassert(msb >= 0 && msb <= 31);
22347          vassert(mask != 0); // guaranteed by msb being in 0 .. 31 inclusive
22348
22349          assign(src, getIRegT(rN));
22350          assign(tmp, binop(Iop_And32,
22351                            binop(Iop_Shr32, mkexpr(src), mkU8(lsb)),
22352                            mkU32(mask)));
22353          assign(res, binop(isU ? Iop_Shr32 : Iop_Sar32,
22354                            binop(Iop_Shl32, mkexpr(tmp), mkU8(31-wm1)),
22355                            mkU8(31-wm1)));
22356
22357          putIRegT(rD, mkexpr(res), condT);
22358
22359          DIP("%s r%u, r%u, #%u, #%u\n",
22360              isU ? "ubfx" : "sbfx", rD, rN, lsb, wm1 + 1);
22361          goto decode_success;
22362       }
22363    }
22364
22365    /* ------------------ UXTB ------------------ */
22366    /* ------------------ UXTH ------------------ */
22367    /* ------------------ SXTB ------------------ */
22368    /* ------------------ SXTH ------------------ */
22369    /* ----------------- UXTB16 ----------------- */
22370    /* ----------------- SXTB16 ----------------- */
22371    /* FIXME: this is an exact duplicate of the ARM version.  They
22372       should be commoned up. */
22373    if ((INSN0(15,0) == 0xFA5F     // UXTB
22374         || INSN0(15,0) == 0xFA1F  // UXTH
22375         || INSN0(15,0) == 0xFA4F  // SXTB
22376         || INSN0(15,0) == 0xFA0F  // SXTH
22377         || INSN0(15,0) == 0xFA3F  // UXTB16
22378         || INSN0(15,0) == 0xFA2F) // SXTB16
22379        && INSN1(15,12) == BITS4(1,1,1,1)
22380        && INSN1(7,6) == BITS2(1,0)) {
22381       UInt rD = INSN1(11,8);
22382       UInt rM = INSN1(3,0);
22383       UInt rot = INSN1(5,4);
22384       if (!isBadRegT(rD) && !isBadRegT(rM)) {
22385          const HChar* nm = "???";
22386          IRTemp srcT = newTemp(Ity_I32);
22387          IRTemp rotT = newTemp(Ity_I32);
22388          IRTemp dstT = newTemp(Ity_I32);
22389          assign(srcT, getIRegT(rM));
22390          assign(rotT, genROR32(srcT, 8 * rot));
22391          switch (INSN0(15,0)) {
22392             case 0xFA5F: // UXTB
22393                nm = "uxtb";
22394                assign(dstT, unop(Iop_8Uto32,
22395                                  unop(Iop_32to8, mkexpr(rotT))));
22396                break;
22397             case 0xFA1F: // UXTH
22398                nm = "uxth";
22399                assign(dstT, unop(Iop_16Uto32,
22400                                  unop(Iop_32to16, mkexpr(rotT))));
22401                break;
22402             case 0xFA4F: // SXTB
22403                nm = "sxtb";
22404                assign(dstT, unop(Iop_8Sto32,
22405                                  unop(Iop_32to8, mkexpr(rotT))));
22406                break;
22407             case 0xFA0F: // SXTH
22408                nm = "sxth";
22409                assign(dstT, unop(Iop_16Sto32,
22410                                  unop(Iop_32to16, mkexpr(rotT))));
22411                break;
22412             case 0xFA3F: // UXTB16
22413                nm = "uxtb16";
22414                assign(dstT, binop(Iop_And32, mkexpr(rotT),
22415                                              mkU32(0x00FF00FF)));
22416                break;
22417             case 0xFA2F: { // SXTB16
22418                nm = "sxtb16";
22419                IRTemp lo32 = newTemp(Ity_I32);
22420                IRTemp hi32 = newTemp(Ity_I32);
22421                assign(lo32, binop(Iop_And32, mkexpr(rotT), mkU32(0xFF)));
22422                assign(hi32, binop(Iop_Shr32, mkexpr(rotT), mkU8(16)));
22423                assign(
22424                   dstT,
22425                   binop(Iop_Or32,
22426                         binop(Iop_And32,
22427                               unop(Iop_8Sto32,
22428                                    unop(Iop_32to8, mkexpr(lo32))),
22429                               mkU32(0xFFFF)),
22430                         binop(Iop_Shl32,
22431                               unop(Iop_8Sto32,
22432                                    unop(Iop_32to8, mkexpr(hi32))),
22433                               mkU8(16))
22434                ));
22435                break;
22436             }
22437             default:
22438                vassert(0);
22439          }
22440          putIRegT(rD, mkexpr(dstT), condT);
22441          DIP("%s r%u, r%u, ror #%u\n", nm, rD, rM, 8 * rot);
22442          goto decode_success;
22443       }
22444    }
22445
22446    /* -------------- MUL.W Rd, Rn, Rm -------------- */
22447    if (INSN0(15,4) == 0xFB0
22448        && (INSN1(15,0) & 0xF0F0) == 0xF000) {
22449       UInt rN = INSN0(3,0);
22450       UInt rD = INSN1(11,8);
22451       UInt rM = INSN1(3,0);
22452       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22453          IRTemp res = newTemp(Ity_I32);
22454          assign(res, binop(Iop_Mul32, getIRegT(rN), getIRegT(rM)));
22455          putIRegT(rD, mkexpr(res), condT);
22456          DIP("mul.w r%u, r%u, r%u\n", rD, rN, rM);
22457          goto decode_success;
22458       }
22459    }
22460
22461    /* -------------- SDIV.W Rd, Rn, Rm -------------- */
22462    if (INSN0(15,4) == 0xFB9
22463        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
22464       UInt rN = INSN0(3,0);
22465       UInt rD = INSN1(11,8);
22466       UInt rM = INSN1(3,0);
22467       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22468          IRTemp res  = newTemp(Ity_I32);
22469          IRTemp argL = newTemp(Ity_I32);
22470          IRTemp argR = newTemp(Ity_I32);
22471          assign(argL, getIRegT(rN));
22472          assign(argR, getIRegT(rM));
22473          assign(res, binop(Iop_DivS32, mkexpr(argL), mkexpr(argR)));
22474          putIRegT(rD, mkexpr(res), condT);
22475          DIP("sdiv.w r%u, r%u, r%u\n", rD, rN, rM);
22476          goto decode_success;
22477       }
22478    }
22479
22480    /* -------------- UDIV.W Rd, Rn, Rm -------------- */
22481    if (INSN0(15,4) == 0xFBB
22482        && (INSN1(15,0) & 0xF0F0) == 0xF0F0) {
22483       UInt rN = INSN0(3,0);
22484       UInt rD = INSN1(11,8);
22485       UInt rM = INSN1(3,0);
22486       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22487          IRTemp res  = newTemp(Ity_I32);
22488          IRTemp argL = newTemp(Ity_I32);
22489          IRTemp argR = newTemp(Ity_I32);
22490          assign(argL, getIRegT(rN));
22491          assign(argR, getIRegT(rM));
22492          assign(res, binop(Iop_DivU32, mkexpr(argL), mkexpr(argR)));
22493          putIRegT(rD, mkexpr(res), condT);
22494          DIP("udiv.w r%u, r%u, r%u\n", rD, rN, rM);
22495          goto decode_success;
22496       }
22497    }
22498
22499    /* ------------------ {U,S}MULL ------------------ */
22500    if ((INSN0(15,4) == 0xFB8 || INSN0(15,4) == 0xFBA)
22501        && INSN1(7,4) == BITS4(0,0,0,0)) {
22502       UInt isU  = INSN0(5,5);
22503       UInt rN   = INSN0(3,0);
22504       UInt rDlo = INSN1(15,12);
22505       UInt rDhi = INSN1(11,8);
22506       UInt rM   = INSN1(3,0);
22507       if (!isBadRegT(rDhi) && !isBadRegT(rDlo)
22508           && !isBadRegT(rN) && !isBadRegT(rM) && rDlo != rDhi) {
22509          IRTemp res   = newTemp(Ity_I64);
22510          assign(res, binop(isU ? Iop_MullU32 : Iop_MullS32,
22511                            getIRegT(rN), getIRegT(rM)));
22512          putIRegT( rDhi, unop(Iop_64HIto32, mkexpr(res)), condT );
22513          putIRegT( rDlo, unop(Iop_64to32, mkexpr(res)), condT );
22514          DIP("%cmull r%u, r%u, r%u, r%u\n",
22515              isU ? 'u' : 's', rDlo, rDhi, rN, rM);
22516          goto decode_success;
22517       }
22518    }
22519
22520    /* ------------------ ML{A,S} ------------------ */
22521    if (INSN0(15,4) == 0xFB0
22522        && (   INSN1(7,4) == BITS4(0,0,0,0)    // MLA
22523            || INSN1(7,4) == BITS4(0,0,0,1))) { // MLS
22524       UInt rN = INSN0(3,0);
22525       UInt rA = INSN1(15,12);
22526       UInt rD = INSN1(11,8);
22527       UInt rM = INSN1(3,0);
22528       if (!isBadRegT(rD) && !isBadRegT(rN)
22529           && !isBadRegT(rM) && !isBadRegT(rA)) {
22530          Bool   isMLA = INSN1(7,4) == BITS4(0,0,0,0);
22531          IRTemp res   = newTemp(Ity_I32);
22532          assign(res,
22533                 binop(isMLA ? Iop_Add32 : Iop_Sub32,
22534                       getIRegT(rA),
22535                       binop(Iop_Mul32, getIRegT(rN), getIRegT(rM))));
22536          putIRegT(rD, mkexpr(res), condT);
22537          DIP("%s r%u, r%u, r%u, r%u\n",
22538              isMLA ? "mla" : "mls", rD, rN, rM, rA);
22539          goto decode_success;
22540       }
22541    }
22542
22543    /* ------------------ (T3) ADR ------------------ */
22544    if ((INSN0(15,0) == 0xF20F || INSN0(15,0) == 0xF60F)
22545        && INSN1(15,15) == 0) {
22546       /* rD = align4(PC) + imm32 */
22547       UInt rD = INSN1(11,8);
22548       if (!isBadRegT(rD)) {
22549          UInt imm32 = (INSN0(10,10) << 11)
22550                       | (INSN1(14,12) << 8) | INSN1(7,0);
22551          putIRegT(rD, binop(Iop_Add32,
22552                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
22553                             mkU32(imm32)),
22554                       condT);
22555          DIP("add r%u, pc, #%u\n", rD, imm32);
22556          goto decode_success;
22557       }
22558    }
22559
22560    /* ----------------- (T1) UMLAL ----------------- */
22561    /* ----------------- (T1) SMLAL ----------------- */
22562    if ((INSN0(15,4) == 0xFBE // UMLAL
22563         || INSN0(15,4) == 0xFBC) // SMLAL
22564        && INSN1(7,4) == BITS4(0,0,0,0)) {
22565       UInt rN   = INSN0(3,0);
22566       UInt rDlo = INSN1(15,12);
22567       UInt rDhi = INSN1(11,8);
22568       UInt rM   = INSN1(3,0);
22569       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
22570           && !isBadRegT(rM) && rDhi != rDlo) {
22571          Bool   isS   = INSN0(15,4) == 0xFBC;
22572          IRTemp argL  = newTemp(Ity_I32);
22573          IRTemp argR  = newTemp(Ity_I32);
22574          IRTemp old   = newTemp(Ity_I64);
22575          IRTemp res   = newTemp(Ity_I64);
22576          IRTemp resHi = newTemp(Ity_I32);
22577          IRTemp resLo = newTemp(Ity_I32);
22578          IROp   mulOp = isS ? Iop_MullS32 : Iop_MullU32;
22579          assign( argL, getIRegT(rM));
22580          assign( argR, getIRegT(rN));
22581          assign( old, binop(Iop_32HLto64, getIRegT(rDhi), getIRegT(rDlo)) );
22582          assign( res, binop(Iop_Add64,
22583                             mkexpr(old),
22584                             binop(mulOp, mkexpr(argL), mkexpr(argR))) );
22585          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
22586          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
22587          putIRegT( rDhi, mkexpr(resHi), condT );
22588          putIRegT( rDlo, mkexpr(resLo), condT );
22589          DIP("%cmlal r%u, r%u, r%u, r%u\n",
22590              isS ? 's' : 'u', rDlo, rDhi, rN, rM);
22591          goto decode_success;
22592       }
22593    }
22594
22595    /* ------------------ (T1) UMAAL ------------------ */
22596    if (INSN0(15,4) == 0xFBE && INSN1(7,4) == BITS4(0,1,1,0)) {
22597       UInt rN   = INSN0(3,0);
22598       UInt rDlo = INSN1(15,12);
22599       UInt rDhi = INSN1(11,8);
22600       UInt rM   = INSN1(3,0);
22601       if (!isBadRegT(rDlo) && !isBadRegT(rDhi) && !isBadRegT(rN)
22602           && !isBadRegT(rM) && rDhi != rDlo) {
22603          IRTemp argN   = newTemp(Ity_I32);
22604          IRTemp argM   = newTemp(Ity_I32);
22605          IRTemp argDhi = newTemp(Ity_I32);
22606          IRTemp argDlo = newTemp(Ity_I32);
22607          IRTemp res    = newTemp(Ity_I64);
22608          IRTemp resHi  = newTemp(Ity_I32);
22609          IRTemp resLo  = newTemp(Ity_I32);
22610          assign( argN,   getIRegT(rN) );
22611          assign( argM,   getIRegT(rM) );
22612          assign( argDhi, getIRegT(rDhi) );
22613          assign( argDlo, getIRegT(rDlo) );
22614          assign( res,
22615                  binop(Iop_Add64,
22616                        binop(Iop_Add64,
22617                              binop(Iop_MullU32, mkexpr(argN), mkexpr(argM)),
22618                              unop(Iop_32Uto64, mkexpr(argDhi))),
22619                        unop(Iop_32Uto64, mkexpr(argDlo))) );
22620          assign( resHi, unop(Iop_64HIto32, mkexpr(res)) );
22621          assign( resLo, unop(Iop_64to32, mkexpr(res)) );
22622          putIRegT( rDhi, mkexpr(resHi), condT );
22623          putIRegT( rDlo, mkexpr(resLo), condT );
22624          DIP("umaal r%u, r%u, r%u, r%u\n", rDlo, rDhi, rN, rM);
22625          goto decode_success;
22626       }
22627    }
22628
22629    /* ------------------- (T1) SMMUL{R} ------------------ */
22630    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
22631        && INSN0(6,4) == BITS3(1,0,1)
22632        && INSN1(15,12) == BITS4(1,1,1,1)
22633        && INSN1(7,5) == BITS3(0,0,0)) {
22634       UInt bitR = INSN1(4,4);
22635       UInt rD = INSN1(11,8);
22636       UInt rM = INSN1(3,0);
22637       UInt rN = INSN0(3,0);
22638       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22639          IRExpr* res
22640          = unop(Iop_64HIto32,
22641                 binop(Iop_Add64,
22642                       binop(Iop_MullS32, getIRegT(rN), getIRegT(rM)),
22643                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
22644          putIRegT(rD, res, condT);
22645          DIP("smmul%s r%u, r%u, r%u\n",
22646              bitR ? "r" : "", rD, rN, rM);
22647          goto decode_success;
22648       }
22649    }
22650
22651    /* ------------------- (T1) SMMLA{R} ------------------ */
22652    if (INSN0(15,7) == BITS9(1,1,1,1,1,0,1,1,0)
22653        && INSN0(6,4) == BITS3(1,0,1)
22654        && INSN1(7,5) == BITS3(0,0,0)) {
22655       UInt bitR = INSN1(4,4);
22656       UInt rA = INSN1(15,12);
22657       UInt rD = INSN1(11,8);
22658       UInt rM = INSN1(3,0);
22659       UInt rN = INSN0(3,0);
22660       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM) && (rA != 13)) {
22661          IRExpr* res
22662          = unop(Iop_64HIto32,
22663                 binop(Iop_Add64,
22664                       binop(Iop_Add64,
22665                             binop(Iop_32HLto64, getIRegT(rA), mkU32(0)),
22666                             binop(Iop_MullS32, getIRegT(rN), getIRegT(rM))),
22667                       mkU64(bitR ? 0x80000000ULL : 0ULL)));
22668          putIRegT(rD, res, condT);
22669          DIP("smmla%s r%u, r%u, r%u, r%u\n",
22670              bitR ? "r" : "", rD, rN, rM, rA);
22671          goto decode_success;
22672       }
22673    }
22674
22675    /* ------------------ (T2) ADR ------------------ */
22676    if ((INSN0(15,0) == 0xF2AF || INSN0(15,0) == 0xF6AF)
22677        && INSN1(15,15) == 0) {
22678       /* rD = align4(PC) - imm32 */
22679       UInt rD = INSN1(11,8);
22680       if (!isBadRegT(rD)) {
22681          UInt imm32 = (INSN0(10,10) << 11)
22682                       | (INSN1(14,12) << 8) | INSN1(7,0);
22683          putIRegT(rD, binop(Iop_Sub32,
22684                             binop(Iop_And32, getIRegT(15), mkU32(~3U)),
22685                             mkU32(imm32)),
22686                       condT);
22687          DIP("sub r%u, pc, #%u\n", rD, imm32);
22688          goto decode_success;
22689       }
22690    }
22691
22692    /* ------------------- (T1) BFI ------------------- */
22693    /* ------------------- (T1) BFC ------------------- */
22694    if (INSN0(15,4) == 0xF36 && INSN1(15,15) == 0 && INSN1(5,5) == 0) {
22695       UInt rD  = INSN1(11,8);
22696       UInt rN  = INSN0(3,0);
22697       UInt msb = INSN1(4,0);
22698       UInt lsb = (INSN1(14,12) << 2) | INSN1(7,6);
22699       if (isBadRegT(rD) || rN == 13 || msb < lsb) {
22700          /* undecodable; fall through */
22701       } else {
22702          IRTemp src    = newTemp(Ity_I32);
22703          IRTemp olddst = newTemp(Ity_I32);
22704          IRTemp newdst = newTemp(Ity_I32);
22705          UInt   mask   = ((UInt)1) << (msb - lsb);
22706          mask = (mask - 1) + mask;
22707          vassert(mask != 0); // guaranteed by "msb < lsb" check above
22708          mask <<= lsb;
22709
22710          assign(src, rN == 15 ? mkU32(0) : getIRegT(rN));
22711          assign(olddst, getIRegT(rD));
22712          assign(newdst,
22713                 binop(Iop_Or32,
22714                    binop(Iop_And32,
22715                          binop(Iop_Shl32, mkexpr(src), mkU8(lsb)),
22716                          mkU32(mask)),
22717                    binop(Iop_And32,
22718                          mkexpr(olddst),
22719                          mkU32(~mask)))
22720                );
22721
22722          putIRegT(rD, mkexpr(newdst), condT);
22723
22724          if (rN == 15) {
22725             DIP("bfc r%u, #%u, #%u\n",
22726                 rD, lsb, msb-lsb+1);
22727          } else {
22728             DIP("bfi r%u, r%u, #%u, #%u\n",
22729                 rD, rN, lsb, msb-lsb+1);
22730          }
22731          goto decode_success;
22732       }
22733    }
22734
22735    /* ------------------- (T1) SXTAH ------------------- */
22736    /* ------------------- (T1) UXTAH ------------------- */
22737    if ((INSN0(15,4) == 0xFA1      // UXTAH
22738         || INSN0(15,4) == 0xFA0)  // SXTAH
22739        && INSN1(15,12) == BITS4(1,1,1,1)
22740        && INSN1(7,6) == BITS2(1,0)) {
22741       Bool isU = INSN0(15,4) == 0xFA1;
22742       UInt rN  = INSN0(3,0);
22743       UInt rD  = INSN1(11,8);
22744       UInt rM  = INSN1(3,0);
22745       UInt rot = INSN1(5,4);
22746       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22747          IRTemp srcL = newTemp(Ity_I32);
22748          IRTemp srcR = newTemp(Ity_I32);
22749          IRTemp res  = newTemp(Ity_I32);
22750          assign(srcR, getIRegT(rM));
22751          assign(srcL, getIRegT(rN));
22752          assign(res,  binop(Iop_Add32,
22753                             mkexpr(srcL),
22754                             unop(isU ? Iop_16Uto32 : Iop_16Sto32,
22755                                  unop(Iop_32to16,
22756                                       genROR32(srcR, 8 * rot)))));
22757          putIRegT(rD, mkexpr(res), condT);
22758          DIP("%cxtah r%u, r%u, r%u, ror #%u\n",
22759              isU ? 'u' : 's', rD, rN, rM, rot);
22760          goto decode_success;
22761       }
22762    }
22763
22764    /* ------------------- (T1) SXTAB ------------------- */
22765    /* ------------------- (T1) UXTAB ------------------- */
22766    if ((INSN0(15,4) == 0xFA5      // UXTAB
22767         || INSN0(15,4) == 0xFA4)  // SXTAB
22768        && INSN1(15,12) == BITS4(1,1,1,1)
22769        && INSN1(7,6) == BITS2(1,0)) {
22770       Bool isU = INSN0(15,4) == 0xFA5;
22771       UInt rN  = INSN0(3,0);
22772       UInt rD  = INSN1(11,8);
22773       UInt rM  = INSN1(3,0);
22774       UInt rot = INSN1(5,4);
22775       if (!isBadRegT(rD) && !isBadRegT(rN) && !isBadRegT(rM)) {
22776          IRTemp srcL = newTemp(Ity_I32);
22777          IRTemp srcR = newTemp(Ity_I32);
22778          IRTemp res  = newTemp(Ity_I32);
22779          assign(srcR, getIRegT(rM));
22780          assign(srcL, getIRegT(rN));
22781          assign(res,  binop(Iop_Add32,
22782                             mkexpr(srcL),
22783                             unop(isU ? Iop_8Uto32 : Iop_8Sto32,
22784                                  unop(Iop_32to8,
22785                                       genROR32(srcR, 8 * rot)))));
22786          putIRegT(rD, mkexpr(res), condT);
22787          DIP("%cxtab r%u, r%u, r%u, ror #%u\n",
22788              isU ? 'u' : 's', rD, rN, rM, rot);
22789          goto decode_success;
22790       }
22791    }
22792
22793    /* ------------------- (T1) CLZ ------------------- */
22794    if (INSN0(15,4) == 0xFAB
22795        && INSN1(15,12) == BITS4(1,1,1,1)
22796        && INSN1(7,4) == BITS4(1,0,0,0)) {
22797       UInt rM1 = INSN0(3,0);
22798       UInt rD  = INSN1(11,8);
22799       UInt rM2 = INSN1(3,0);
22800       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22801          IRTemp arg = newTemp(Ity_I32);
22802          IRTemp res = newTemp(Ity_I32);
22803          assign(arg, getIRegT(rM1));
22804          assign(res, IRExpr_ITE(
22805                         binop(Iop_CmpEQ32, mkexpr(arg), mkU32(0)),
22806                         mkU32(32),
22807                         unop(Iop_Clz32, mkexpr(arg))
22808          ));
22809          putIRegT(rD, mkexpr(res), condT);
22810          DIP("clz r%u, r%u\n", rD, rM1);
22811          goto decode_success;
22812       }
22813    }
22814
22815    /* ------------------- (T1) RBIT ------------------- */
22816    if (INSN0(15,4) == 0xFA9
22817        && INSN1(15,12) == BITS4(1,1,1,1)
22818        && INSN1(7,4) == BITS4(1,0,1,0)) {
22819       UInt rM1 = INSN0(3,0);
22820       UInt rD  = INSN1(11,8);
22821       UInt rM2 = INSN1(3,0);
22822       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22823          IRTemp arg = newTemp(Ity_I32);
22824          assign(arg, getIRegT(rM1));
22825          IRTemp res = gen_BITREV(arg);
22826          putIRegT(rD, mkexpr(res), condT);
22827          DIP("rbit r%u, r%u\n", rD, rM1);
22828          goto decode_success;
22829       }
22830    }
22831
22832    /* ------------------- (T2) REV   ------------------- */
22833    /* ------------------- (T2) REV16 ------------------- */
22834    if (INSN0(15,4) == 0xFA9
22835        && INSN1(15,12) == BITS4(1,1,1,1)
22836        && (   INSN1(7,4) == BITS4(1,0,0,0)     // REV
22837            || INSN1(7,4) == BITS4(1,0,0,1))) { // REV16
22838       UInt rM1   = INSN0(3,0);
22839       UInt rD    = INSN1(11,8);
22840       UInt rM2   = INSN1(3,0);
22841       Bool isREV = INSN1(7,4) == BITS4(1,0,0,0);
22842       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22843          IRTemp arg = newTemp(Ity_I32);
22844          assign(arg, getIRegT(rM1));
22845          IRTemp res = isREV ? gen_REV(arg) : gen_REV16(arg);
22846          putIRegT(rD, mkexpr(res), condT);
22847          DIP("rev%s r%u, r%u\n", isREV ? "" : "16", rD, rM1);
22848          goto decode_success;
22849       }
22850    }
22851
22852    /* ------------------- (T2) REVSH ------------------ */
22853    if (INSN0(15,4) == 0xFA9
22854        && INSN1(15,12) == BITS4(1,1,1,1)
22855        && INSN1(7,4) == BITS4(1,0,1,1)) {
22856       UInt rM1 = INSN0(3,0);
22857       UInt rM2 = INSN1(3,0);
22858       UInt rD  = INSN1(11,8);
22859       if (!isBadRegT(rD) && !isBadRegT(rM1) && rM1 == rM2) {
22860          IRTemp irt_rM  = newTemp(Ity_I32);
22861          IRTemp irt_hi  = newTemp(Ity_I32);
22862          IRTemp irt_low = newTemp(Ity_I32);
22863          IRTemp irt_res = newTemp(Ity_I32);
22864          assign(irt_rM, getIRegT(rM1));
22865          assign(irt_hi,
22866                 binop(Iop_Sar32,
22867                       binop(Iop_Shl32, mkexpr(irt_rM), mkU8(24)),
22868                       mkU8(16)
22869                 )
22870          );
22871          assign(irt_low,
22872                 binop(Iop_And32,
22873                       binop(Iop_Shr32, mkexpr(irt_rM), mkU8(8)),
22874                       mkU32(0xFF)
22875                 )
22876          );
22877          assign(irt_res,
22878                 binop(Iop_Or32, mkexpr(irt_hi), mkexpr(irt_low))
22879          );
22880          putIRegT(rD, mkexpr(irt_res), condT);
22881          DIP("revsh r%u, r%u\n", rD, rM1);
22882          goto decode_success;
22883       }
22884    }
22885
22886    /* -------------- (T1) MSR apsr, reg -------------- */
22887    if (INSN0(15,4) == 0xF38
22888        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(9,0) == 0x000) {
22889       UInt rN          = INSN0(3,0);
22890       UInt write_ge    = INSN1(10,10);
22891       UInt write_nzcvq = INSN1(11,11);
22892       if (!isBadRegT(rN) && (write_nzcvq || write_ge)) {
22893          IRTemp rNt = newTemp(Ity_I32);
22894          assign(rNt, getIRegT(rN));
22895          desynthesise_APSR( write_nzcvq, write_ge, rNt, condT );
22896          DIP("msr cpsr_%s%s, r%u\n",
22897              write_nzcvq ? "f" : "", write_ge ? "g" : "", rN);
22898          goto decode_success;
22899       }
22900    }
22901
22902    /* -------------- (T1) MRS reg, apsr -------------- */
22903    if (INSN0(15,0) == 0xF3EF
22904        && INSN1(15,12) == BITS4(1,0,0,0) && INSN1(7,0) == 0x00) {
22905       UInt rD = INSN1(11,8);
22906       if (!isBadRegT(rD)) {
22907          IRTemp apsr = synthesise_APSR();
22908          putIRegT( rD, mkexpr(apsr), condT );
22909          DIP("mrs r%u, cpsr\n", rD);
22910          goto decode_success;
22911       }
22912    }
22913
22914    /* ----------------- (T1) LDREX ----------------- */
22915    if (INSN0(15,4) == 0xE85 && INSN1(11,8) == BITS4(1,1,1,1)) {
22916       UInt rN   = INSN0(3,0);
22917       UInt rT   = INSN1(15,12);
22918       UInt imm8 = INSN1(7,0);
22919       if (!isBadRegT(rT) && rN != 15) {
22920          IRTemp res;
22921          // go uncond
22922          mk_skip_over_T32_if_cond_is_false( condT );
22923          // now uncond
22924          res = newTemp(Ity_I32);
22925          stmt( IRStmt_LLSC(Iend_LE,
22926                            res,
22927                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
22928                            NULL/*this is a load*/ ));
22929          putIRegT(rT, mkexpr(res), IRTemp_INVALID);
22930          DIP("ldrex r%u, [r%u, #+%u]\n", rT, rN, imm8 * 4);
22931          goto decode_success;
22932       }
22933    }
22934
22935    /* --------------- (T1) LDREX{B,H} --------------- */
22936    if (INSN0(15,4) == 0xE8D
22937        && (INSN1(11,0) == 0xF4F || INSN1(11,0) == 0xF5F)) {
22938       UInt rN  = INSN0(3,0);
22939       UInt rT  = INSN1(15,12);
22940       Bool isH = INSN1(11,0) == 0xF5F;
22941       if (!isBadRegT(rT) && rN != 15) {
22942          IRTemp res;
22943          // go uncond
22944          mk_skip_over_T32_if_cond_is_false( condT );
22945          // now uncond
22946          res = newTemp(isH ? Ity_I16 : Ity_I8);
22947          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
22948                            NULL/*this is a load*/ ));
22949          putIRegT(rT, unop(isH ? Iop_16Uto32 : Iop_8Uto32, mkexpr(res)),
22950                       IRTemp_INVALID);
22951          DIP("ldrex%c r%u, [r%u]\n", isH ? 'h' : 'b', rT, rN);
22952          goto decode_success;
22953       }
22954    }
22955
22956    /* --------------- (T1) LDREXD --------------- */
22957    if (INSN0(15,4) == 0xE8D && INSN1(7,0) == 0x7F) {
22958       UInt rN  = INSN0(3,0);
22959       UInt rT  = INSN1(15,12);
22960       UInt rT2 = INSN1(11,8);
22961       if (!isBadRegT(rT) && !isBadRegT(rT2) && rT != rT2 && rN != 15) {
22962          IRTemp res;
22963          // go uncond
22964          mk_skip_over_T32_if_cond_is_false( condT );
22965          // now uncond
22966          res = newTemp(Ity_I64);
22967          // FIXME: assumes little-endian guest
22968          stmt( IRStmt_LLSC(Iend_LE, res, getIRegT(rN),
22969                            NULL/*this is a load*/ ));
22970          // FIXME: assumes little-endian guest
22971          putIRegT(rT,  unop(Iop_64to32,   mkexpr(res)), IRTemp_INVALID);
22972          putIRegT(rT2, unop(Iop_64HIto32, mkexpr(res)), IRTemp_INVALID);
22973          DIP("ldrexd r%u, r%u, [r%u]\n", rT, rT2, rN);
22974          goto decode_success;
22975       }
22976    }
22977
22978    /* ----------------- (T1) STREX ----------------- */
22979    if (INSN0(15,4) == 0xE84) {
22980       UInt rN   = INSN0(3,0);
22981       UInt rT   = INSN1(15,12);
22982       UInt rD   = INSN1(11,8);
22983       UInt imm8 = INSN1(7,0);
22984       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
22985           && rD != rN && rD != rT) {
22986          IRTemp resSC1, resSC32;
22987          // go uncond
22988          mk_skip_over_T32_if_cond_is_false( condT );
22989          // now uncond
22990          /* Ok, now we're unconditional.  Do the store. */
22991          resSC1 = newTemp(Ity_I1);
22992          stmt( IRStmt_LLSC(Iend_LE,
22993                            resSC1,
22994                            binop(Iop_Add32, getIRegT(rN), mkU32(imm8 * 4)),
22995                            getIRegT(rT)) );
22996          /* Set rD to 1 on failure, 0 on success.  Currently we have
22997             resSC1 == 0 on failure, 1 on success. */
22998          resSC32 = newTemp(Ity_I32);
22999          assign(resSC32,
23000                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
23001          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
23002          DIP("strex r%u, r%u, [r%u, #+%u]\n", rD, rT, rN, imm8 * 4);
23003          goto decode_success;
23004       }
23005    }
23006
23007    /* --------------- (T1) STREX{B,H} --------------- */
23008    if (INSN0(15,4) == 0xE8C
23009        && (INSN1(11,4) == 0xF4 || INSN1(11,4) == 0xF5)) {
23010       UInt rN  = INSN0(3,0);
23011       UInt rT  = INSN1(15,12);
23012       UInt rD  = INSN1(3,0);
23013       Bool isH = INSN1(11,4) == 0xF5;
23014       if (!isBadRegT(rD) && !isBadRegT(rT) && rN != 15
23015           && rD != rN && rD != rT) {
23016          IRTemp resSC1, resSC32;
23017          // go uncond
23018          mk_skip_over_T32_if_cond_is_false( condT );
23019          // now uncond
23020          /* Ok, now we're unconditional.  Do the store. */
23021          resSC1 = newTemp(Ity_I1);
23022          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN),
23023                            unop(isH ? Iop_32to16 : Iop_32to8,
23024                                 getIRegT(rT))) );
23025          /* Set rD to 1 on failure, 0 on success.  Currently we have
23026             resSC1 == 0 on failure, 1 on success. */
23027          resSC32 = newTemp(Ity_I32);
23028          assign(resSC32,
23029                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
23030          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
23031          DIP("strex%c r%u, r%u, [r%u]\n", isH ? 'h' : 'b', rD, rT, rN);
23032          goto decode_success;
23033       }
23034    }
23035
23036    /* ---------------- (T1) STREXD ---------------- */
23037    if (INSN0(15,4) == 0xE8C && INSN1(7,4) == BITS4(0,1,1,1)) {
23038       UInt rN  = INSN0(3,0);
23039       UInt rT  = INSN1(15,12);
23040       UInt rT2 = INSN1(11,8);
23041       UInt rD  = INSN1(3,0);
23042       if (!isBadRegT(rD) && !isBadRegT(rT) && !isBadRegT(rT2)
23043           && rN != 15 && rD != rN && rD != rT && rD != rT2) {
23044          IRTemp resSC1, resSC32, data;
23045          // go uncond
23046          mk_skip_over_T32_if_cond_is_false( condT );
23047          // now uncond
23048          /* Ok, now we're unconditional.  Do the store. */
23049          resSC1 = newTemp(Ity_I1);
23050          data = newTemp(Ity_I64);
23051          // FIXME: assumes little-endian guest
23052          assign(data, binop(Iop_32HLto64, getIRegT(rT2), getIRegT(rT)));
23053          // FIXME: assumes little-endian guest
23054          stmt( IRStmt_LLSC(Iend_LE, resSC1, getIRegT(rN), mkexpr(data)));
23055          /* Set rD to 1 on failure, 0 on success.  Currently we have
23056             resSC1 == 0 on failure, 1 on success. */
23057          resSC32 = newTemp(Ity_I32);
23058          assign(resSC32,
23059                 unop(Iop_1Uto32, unop(Iop_Not1, mkexpr(resSC1))));
23060          putIRegT(rD, mkexpr(resSC32), IRTemp_INVALID);
23061          DIP("strexd r%u, r%u, r%u, [r%u]\n", rD, rT, rT2, rN);
23062          goto decode_success;
23063       }
23064    }
23065
23066    /* -------------- v7 barrier insns -------------- */
23067    if (INSN0(15,0) == 0xF3BF && (INSN1(15,0) & 0xFF00) == 0x8F00) {
23068       /* FIXME: should this be unconditional? */
23069       /* XXX this isn't really right, is it?  The generated IR does
23070          them unconditionally.  I guess it doesn't matter since it
23071          doesn't do any harm to do them even when the guarding
23072          condition is false -- it's just a performance loss. */
23073       switch (INSN1(7,0)) {
23074          case 0x4F: /* DSB sy */
23075          case 0x4E: /* DSB st */
23076          case 0x4B: /* DSB ish */
23077          case 0x4A: /* DSB ishst */
23078          case 0x47: /* DSB nsh */
23079          case 0x46: /* DSB nshst */
23080          case 0x43: /* DSB osh */
23081          case 0x42: /* DSB oshst */
23082             stmt( IRStmt_MBE(Imbe_Fence) );
23083             DIP("DSB\n");
23084             goto decode_success;
23085          case 0x5F: /* DMB sy */
23086          case 0x5E: /* DMB st */
23087          case 0x5B: /* DMB ish */
23088          case 0x5A: /* DMB ishst */
23089          case 0x57: /* DMB nsh */
23090          case 0x56: /* DMB nshst */
23091          case 0x53: /* DMB osh */
23092          case 0x52: /* DMB oshst */
23093             stmt( IRStmt_MBE(Imbe_Fence) );
23094             DIP("DMB\n");
23095             goto decode_success;
23096          case 0x6F: /* ISB */
23097             stmt( IRStmt_MBE(Imbe_Fence) );
23098             DIP("ISB\n");
23099             goto decode_success;
23100          default:
23101             break;
23102       }
23103    }
23104
23105    /* ---------------------- PLD{,W} ---------------------- */
23106    if ((INSN0(15,4) & 0xFFD) == 0xF89 && INSN1(15,12) == 0xF) {
23107       /* FIXME: should this be unconditional? */
23108       /* PLD/PLDW immediate, encoding T1 */
23109       UInt rN    = INSN0(3,0);
23110       UInt bW    = INSN0(5,5);
23111       UInt imm12 = INSN1(11,0);
23112       DIP("pld%s [r%u, #%u]\n", bW ? "w" : "",  rN, imm12);
23113       goto decode_success;
23114    }
23115
23116    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,8) == 0xFC) {
23117       /* FIXME: should this be unconditional? */
23118       /* PLD/PLDW immediate, encoding T2 */
23119       UInt rN    = INSN0(3,0);
23120       UInt bW    = INSN0(5,5);
23121       UInt imm8  = INSN1(7,0);
23122       DIP("pld%s [r%u, #-%u]\n", bW ? "w" : "",  rN, imm8);
23123       goto decode_success;
23124    }
23125
23126    if ((INSN0(15,4) & 0xFFD) == 0xF81 && INSN1(15,6) == 0x3C0) {
23127       /* FIXME: should this be unconditional? */
23128       /* PLD/PLDW register, encoding T1 */
23129       UInt rN   = INSN0(3,0);
23130       UInt rM   = INSN1(3,0);
23131       UInt bW   = INSN0(5,5);
23132       UInt imm2 = INSN1(5,4);
23133       if (!isBadRegT(rM)) {
23134          DIP("pld%s [r%u, r%u, lsl %u]\n", bW ? "w" : "", rN, rM, imm2);
23135          goto decode_success;
23136       }
23137       /* fall through */
23138    }
23139
23140    /* -------------- read CP15 TPIDRURO register ------------- */
23141    /* mrc     p15, 0,  r0, c13, c0, 3  up to
23142       mrc     p15, 0, r14, c13, c0, 3
23143    */
23144    /* I don't know whether this is really v7-only.  But anyway, we
23145       have to support it since arm-linux uses TPIDRURO as a thread
23146       state register. */
23147    if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F70)) {
23148       UInt rD = INSN1(15,12);
23149       if (!isBadRegT(rD)) {
23150          putIRegT(rD, IRExpr_Get(OFFB_TPIDRURO, Ity_I32), condT);
23151          DIP("mrc p15,0, r%u, c13, c0, 3\n", rD);
23152          goto decode_success;
23153       }
23154       /* fall through */
23155    }
23156
23157    /* ------------ read/write CP15 TPIDRURW register ----------- */
23158    /* mcr     p15, 0, r0,  c13, c0, 2 (r->cr xfer)  up to
23159       mcr     p15, 0, r14, c13, c0, 2
23160
23161       mrc     p15, 0, r0,  c13, c0, 2 (rc->r xfer)  up to
23162       mrc     p15, 0, r14, c13, c0, 2
23163    */
23164    if ((INSN0(15,0) == 0xEE0D) && (INSN1(11,0) == 0x0F50)) {
23165       UInt rS = INSN1(15,12);
23166       if (!isBadRegT(rS)) {
23167          putMiscReg32(OFFB_TPIDRURW, getIRegT(rS), condT);
23168          DIP("mcr p15,0, r%u, c13, c0, 2\n", rS);
23169          goto decode_success;
23170       }
23171       /* fall through */
23172    }
23173    if ((INSN0(15,0) == 0xEE1D) && (INSN1(11,0) == 0x0F50)) {
23174       UInt rD = INSN1(15,12);
23175       if (!isBadRegT(rD)) {
23176          putIRegT(rD, IRExpr_Get(OFFB_TPIDRURW, Ity_I32), condT);
23177          DIP("mrc p15,0, r%u, c13, c0, 2\n", rD);
23178          goto decode_success;
23179       }
23180       /* fall through */
23181    }
23182
23183    /* -------------- read CP15 PMUSRENR register ------------- */
23184    /* mrc     p15, 0, r0,  c9, c14, 0  up to
23185       mrc     p15, 0, r14, c9, c14, 0
23186       See comment on the ARM equivalent of this (above) for details.
23187    */
23188    if ((INSN0(15,0) == 0xEE19) && (INSN1(11,0) == 0x0F1E)) {
23189       UInt rD = INSN1(15,12);
23190       if (!isBadRegT(rD)) {
23191          putIRegT(rD, mkU32(0), condT);
23192          DIP("mrc p15,0, r%u, c9, c14, 0\n", rD);
23193          goto decode_success;
23194       }
23195       /* fall through */
23196    }
23197
23198    /* ------------------- CLREX ------------------ */
23199    if (INSN0(15,0) == 0xF3BF && INSN1(15,0) == 0x8F2F) {
23200       /* AFAICS, this simply cancels a (all?) reservations made by a
23201          (any?) preceding LDREX(es).  Arrange to hand it through to
23202          the back end. */
23203       mk_skip_over_T32_if_cond_is_false( condT );
23204       stmt( IRStmt_MBE(Imbe_CancelReservation) );
23205       DIP("clrex\n");
23206       goto decode_success;
23207    }
23208
23209    /* ------------------- NOP ------------------ */
23210    if (INSN0(15,0) == 0xF3AF && INSN1(15,0) == 0x8000) {
23211       DIP("nop\n");
23212       goto decode_success;
23213    }
23214
23215    /* -------------- (T1) LDRT reg+#imm8 -------------- */
23216    /* Load Register Unprivileged:
23217       ldrt Rt, [Rn, #imm8]
23218    */
23219    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,1)
23220        && INSN1(11,8) == BITS4(1,1,1,0)) {
23221       UInt rT    = INSN1(15,12);
23222       UInt rN    = INSN0(3,0);
23223       UInt imm8  = INSN1(7,0);
23224       Bool valid = True;
23225       if (rN == 15 || isBadRegT(rT)) valid = False;
23226       if (valid) {
23227          put_ITSTATE(old_itstate);
23228          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23229          IRTemp newRt = newTemp(Ity_I32);
23230          loadGuardedLE( newRt, ILGop_Ident32, ea, llGetIReg(rT), condT );
23231          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23232          put_ITSTATE(new_itstate);
23233          DIP("ldrt r%u, [r%u, #%u]\n", rT, rN, imm8);
23234          goto decode_success;
23235       }
23236    }
23237
23238    /* -------------- (T1) STRT reg+#imm8 -------------- */
23239    /* Store Register Unprivileged:
23240       strt Rt, [Rn, #imm8]
23241    */
23242    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,1) && INSN0(5,4) == BITS2(0,0)
23243        && INSN1(11,8) == BITS4(1,1,1,0)) {
23244       UInt rT    = INSN1(15,12);
23245       UInt rN    = INSN0(3,0);
23246       UInt imm8  = INSN1(7,0);
23247       Bool valid = True;
23248       if (rN == 15 || isBadRegT(rT)) valid = False;
23249       if (valid) {
23250          put_ITSTATE(old_itstate);
23251          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23252          storeGuardedLE( address, llGetIReg(rT), condT );
23253          put_ITSTATE(new_itstate);
23254          DIP("strt r%u, [r%u, #%u]\n", rT, rN, imm8);
23255          goto decode_success;
23256       }
23257    }
23258
23259    /* -------------- (T1) STRBT reg+#imm8 -------------- */
23260    /* Store Register Byte Unprivileged:
23261       strbt Rt, [Rn, #imm8]
23262    */
23263    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,0)
23264        && INSN1(11,8) == BITS4(1,1,1,0)) {
23265       UInt rT    = INSN1(15,12);
23266       UInt rN    = INSN0(3,0);
23267       UInt imm8  = INSN1(7,0);
23268       Bool valid = True;
23269       if (rN == 15 || isBadRegT(rT)) valid = False;
23270       if (valid) {
23271          put_ITSTATE(old_itstate);
23272          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23273          IRExpr* data = unop(Iop_32to8, llGetIReg(rT));
23274          storeGuardedLE( address, data, condT );
23275          put_ITSTATE(new_itstate);
23276          DIP("strbt r%u, [r%u, #%u]\n", rT, rN, imm8);
23277          goto decode_success;
23278       }
23279    }
23280
23281    /* -------------- (T1) LDRHT reg+#imm8 -------------- */
23282    /* Load Register Halfword Unprivileged:
23283       ldrht Rt, [Rn, #imm8]
23284    */
23285    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,1)
23286        && INSN1(11,8) == BITS4(1,1,1,0)) {
23287       UInt rN    = INSN0(3,0);
23288       Bool valid = True;
23289       if (rN == 15) {
23290          /* In this case our instruction is LDRH (literal), in fact:
23291             LDRH (literal) was realized earlier, so we don't want to
23292             make it twice. */
23293          valid = False;
23294       }
23295       UInt rT    = INSN1(15,12);
23296       UInt imm8  = INSN1(7,0);
23297       if (isBadRegT(rT)) valid = False;
23298       if (valid) {
23299          put_ITSTATE(old_itstate);
23300          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23301          IRTemp newRt = newTemp(Ity_I32);
23302          loadGuardedLE( newRt, ILGop_16Uto32, ea, llGetIReg(rT), condT );
23303          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23304          put_ITSTATE(new_itstate);
23305          DIP("ldrht r%u, [r%u, #%u]\n", rT, rN, imm8);
23306          goto decode_success;
23307       }
23308    }
23309
23310    /* -------------- (T1) LDRSHT reg+#imm8 -------------- */
23311    /* Load Register Signed Halfword Unprivileged:
23312       ldrsht Rt, [Rn, #imm8]
23313    */
23314    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(1,1)
23315        && INSN1(11,8) == BITS4(1,1,1,0)) {
23316       UInt rN    = INSN0(3,0);
23317       Bool valid = True;
23318       if (rN == 15) {
23319          /* In this case our instruction is LDRSH (literal), in fact:
23320             LDRSH (literal) was realized earlier, so we don't want to
23321             make it twice. */
23322          valid = False;
23323       }
23324       UInt rT    = INSN1(15,12);
23325       UInt imm8  = INSN1(7,0);
23326       if (isBadRegT(rT)) valid = False;
23327       if (valid) {
23328          put_ITSTATE(old_itstate);
23329          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23330          IRTemp newRt = newTemp(Ity_I32);
23331          loadGuardedLE( newRt, ILGop_16Sto32, ea, llGetIReg(rT), condT );
23332          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23333          put_ITSTATE(new_itstate);
23334          DIP("ldrsht r%u, [r%u, #%u]\n", rT, rN, imm8);
23335          goto decode_success;
23336       }
23337    }
23338
23339    /* -------------- (T1) STRHT reg+#imm8 -------------- */
23340    /* Store Register Halfword Unprivileged:
23341       strht Rt, [Rn, #imm8]
23342    */
23343    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(1,0)
23344        && INSN1(11,8) == BITS4(1,1,1,0)) {
23345       UInt rT    = INSN1(15,12);
23346       UInt rN    = INSN0(3,0);
23347       UInt imm8  = INSN1(7,0);
23348       Bool valid = True;
23349       if (rN == 15 || isBadRegT(rT)) valid = False;
23350       if (valid) {
23351          put_ITSTATE(old_itstate);
23352          IRExpr* address = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23353          IRExpr* data = unop(Iop_32to16, llGetIReg(rT));
23354          storeGuardedLE( address, data, condT );
23355          put_ITSTATE(new_itstate);
23356          DIP("strht r%u, [r%u, #%u]\n", rT, rN, imm8);
23357          goto decode_success;
23358       }
23359    }
23360
23361    /* -------------- (T1) LDRBT reg+#imm8 -------------- */
23362    /* Load Register Byte Unprivileged:
23363       ldrbt Rt, [Rn, #imm8]
23364    */
23365    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,0,0,0) && INSN0(5,4) == BITS2(0,1)
23366        && INSN1(11,8) == BITS4(1,1,1,0)) {
23367       UInt rN    = INSN0(3,0);
23368       UInt rT    = INSN1(15,12);
23369       UInt imm8  = INSN1(7,0);
23370       Bool valid = True;
23371       if (rN == 15 /* insn is LDRB (literal) */) valid = False;
23372       if (isBadRegT(rT)) valid = False;
23373       if (valid) {
23374          put_ITSTATE(old_itstate);
23375          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23376          IRTemp newRt = newTemp(Ity_I32);
23377          loadGuardedLE( newRt, ILGop_8Uto32, ea, llGetIReg(rT), condT );
23378          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23379          put_ITSTATE(new_itstate);
23380          DIP("ldrbt r%u, [r%u, #%u]\n", rT, rN, imm8);
23381          goto decode_success;
23382       }
23383    }
23384
23385    /* -------------- (T1) LDRSBT reg+#imm8 -------------- */
23386    /* Load Register Signed Byte Unprivileged:
23387       ldrsbt Rt, [Rn, #imm8]
23388    */
23389    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
23390        && INSN1(11,8) == BITS4(1,1,1,0)) {
23391       UInt rN    = INSN0(3,0);
23392       Bool valid = True;
23393       UInt rT    = INSN1(15,12);
23394       UInt imm8  = INSN1(7,0);
23395       if (rN == 15 /* insn is LDRSB (literal) */) valid = False;
23396       if (isBadRegT(rT)) valid = False;
23397       if (valid) {
23398          put_ITSTATE(old_itstate);
23399          IRExpr* ea = binop(Iop_Add32, getIRegT(rN), mkU32(imm8));
23400          IRTemp newRt = newTemp(Ity_I32);
23401          loadGuardedLE( newRt, ILGop_8Sto32, ea, llGetIReg(rT), condT );
23402          putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
23403          put_ITSTATE(new_itstate);
23404          DIP("ldrsbt r%u, [r%u, #%u]\n", rT, rN, imm8);
23405          goto decode_success;
23406       }
23407    }
23408
23409    /* -------------- (T1) PLI reg+#imm12 -------------- */
23410    /* Preload Instruction:
23411       pli [Rn, #imm12]
23412    */
23413    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,1,0) && INSN0(5,4) == BITS2(0,1)
23414        && INSN1(15,12) == BITS4(1,1,1,1)) {
23415       UInt rN    = INSN0(3,0);
23416       UInt imm12 = INSN1(11,0);
23417       if (rN != 15) {
23418          DIP("pli [r%u, #%u]\n", rN, imm12);
23419          goto decode_success;
23420       }
23421    }
23422
23423    /* -------------- (T2) PLI reg-#imm8 -------------- */
23424    /* Preload Instruction:
23425       pli [Rn, #-imm8]
23426    */
23427    if (INSN0(15,6) == BITS10(1,1,1,1,1,0,0,1,0,0) && INSN0(5,4) == BITS2(0,1)
23428        && INSN1(15,8) == BITS8(1,1,1,1,1,1,0,0)) {
23429       UInt rN   = INSN0(3,0);
23430       UInt imm8 = INSN1(7,0);
23431       if (rN != 15) {
23432          DIP("pli [r%u, #-%u]\n", rN, imm8);
23433          goto decode_success;
23434       }
23435    }
23436
23437    /* -------------- (T3) PLI PC+/-#imm12 -------------- */
23438    /* Preload Instruction:
23439       pli [PC, #+/-imm12]
23440    */
23441    if (INSN0(15,8) == BITS8(1,1,1,1,1,0,0,1)
23442        && INSN0(6,0) == BITS7(0,0,1,1,1,1,1)
23443        && INSN1(15,12) == BITS4(1,1,1,1)) {
23444       UInt imm12 = INSN1(11,0);
23445       UInt bU    = INSN0(7,7);
23446       DIP("pli [pc, #%c%u]\n", bU == 1 ? '+' : '-', imm12);
23447       goto decode_success;
23448    }
23449
23450    /* ----------------------------------------------------------- */
23451    /* -- VFP (CP 10, CP 11) instructions (in Thumb mode)       -- */
23452    /* ----------------------------------------------------------- */
23453
23454    if (INSN0(15,12) == BITS4(1,1,1,0)) {
23455       UInt insn28 = (INSN0(11,0) << 16) | INSN1(15,0);
23456       Bool ok_vfp = decode_CP10_CP11_instruction (
23457                        &dres, insn28, condT, ARMCondAL/*bogus*/,
23458                        True/*isT*/
23459                     );
23460       if (ok_vfp)
23461          goto decode_success;
23462    }
23463
23464    /* ----------------------------------------------------------- */
23465    /* -- NEON instructions (only v7 and below, in Thumb mode)  -- */
23466    /* ----------------------------------------------------------- */
23467
23468    if (archinfo->hwcaps & VEX_HWCAPS_ARM_NEON) {
23469       UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
23470       Bool ok_neon = decode_NEON_instruction_ARMv7_and_below(
23471                         &dres, insn32, condT, True/*isT*/
23472                      );
23473       if (ok_neon)
23474          goto decode_success;
23475    }
23476
23477    /* ----------------------------------------------------------- */
23478    /* -- v6 media instructions (in Thumb mode)                 -- */
23479    /* ----------------------------------------------------------- */
23480
23481    { UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
23482      Bool ok_v6m = decode_V6MEDIA_instruction(
23483                       &dres, insn32, condT, ARMCondAL/*bogus*/,
23484                       True/*isT*/
23485                    );
23486      if (ok_v6m)
23487         goto decode_success;
23488    }
23489
23490    /* ----------------------------------------------------------- */
23491    /* -- v8 instructions (in Thumb mode)                       -- */
23492    /* ----------------------------------------------------------- */
23493
23494    /* If we get here, it means that all attempts to decode the
23495       instruction as ARMv7 or earlier have failed.  So, if we're doing
23496       ARMv8 or later, here is the point to try for it. */
23497
23498    if (VEX_ARM_ARCHLEVEL(archinfo->hwcaps) >= 8) {
23499       UInt insn32 = (INSN0(15,0) << 16) | INSN1(15,0);
23500       Bool ok_v8
23501          = decode_V8_instruction( &dres, insn32, condT, True/*isT*/,
23502                                   old_itstate, new_itstate );
23503       if (ok_v8)
23504          goto decode_success;
23505    }
23506
23507    /* ----------------------------------------------------------- */
23508    /* -- Undecodable                                           -- */
23509    /* ----------------------------------------------------------- */
23510
23511    goto decode_failure;
23512    /*NOTREACHED*/
23513
23514   decode_failure:
23515    /* All decode failures end up here. */
23516    if (sigill_diag)
23517       vex_printf("disInstr(thumb): unhandled instruction: "
23518                  "0x%04x 0x%04x\n", (UInt)insn0, (UInt)insn1);
23519
23520    /* Back up ITSTATE to the initial value for this instruction.
23521       If we don't do that, any subsequent restart of the instruction
23522       will restart with the wrong value. */
23523    if (old_itstate != IRTemp_INVALID)
23524       put_ITSTATE(old_itstate);
23525
23526    /* Tell the dispatcher that this insn cannot be decoded, and so has
23527       not been executed, and (is currently) the next to be executed.
23528       R15 should be up-to-date since it made so at the start of each
23529       insn, but nevertheless be paranoid and update it again right
23530       now. */
23531    vassert(0 == (guest_R15_curr_instr_notENC & 1));
23532    llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
23533    dres.len         = 0;
23534    dres.whatNext    = Dis_StopHere;
23535    dres.jk_StopHere = Ijk_NoDecode;
23536    dres.continueAt  = 0;
23537    return dres;
23538
23539   decode_success:
23540    /* All decode successes end up here. */
23541    vassert(dres.len == 4 || dres.len == 2 || dres.len == 20);
23542    switch (dres.whatNext) {
23543       case Dis_Continue:
23544          llPutIReg(15, mkU32(dres.len + (guest_R15_curr_instr_notENC | 1)));
23545          break;
23546       case Dis_ResteerU:
23547       case Dis_ResteerC:
23548          llPutIReg(15, mkU32(dres.continueAt));
23549          break;
23550       case Dis_StopHere:
23551          break;
23552       default:
23553          vassert(0);
23554    }
23555
23556    DIP("\n");
23557
23558    return dres;
23559
23560 #  undef INSN0
23561 #  undef INSN1
23562 }
23563
23564 #undef DIP
23565 #undef DIS
23566
23567
23568 /* Helper table for figuring out how many insns an IT insn
23569    conditionalises.
23570
23571    An ITxyz instruction of the format "1011 1111 firstcond mask"
23572    conditionalises some number of instructions, as indicated by the
23573    following table.  A value of zero indicates the instruction is
23574    invalid in some way.
23575
23576    mask = 0 means this isn't an IT instruction
23577    fc = 15 (NV) means unpredictable
23578
23579    The line fc = 14 (AL) is different from the others; there are
23580    additional constraints in this case.
23581
23582           mask(0 ..                   15)
23583         +--------------------------------
23584    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23585    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23586         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23587         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23588         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23589         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23590         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23591         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23592         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23593         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23594         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23595         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23596         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23597         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23598         | 0 4 3 0 2 0 0 0 1 0 0 0 0 0 0 0
23599    15)  | 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
23600
23601    To be conservative with the analysis, let's rule out the mask = 0
23602    case, since that isn't an IT insn at all.  But for all the other
23603    cases where the table contains zero, that means unpredictable, so
23604    let's say 4 to be conservative.  Hence we have a safe value for any
23605    IT (mask,fc) pair that the CPU would actually identify as an IT
23606    instruction.  The final table is
23607
23608           mask(0 ..                   15)
23609         +--------------------------------
23610    fc(0 | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23611    ..   | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23612         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23613         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23614         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23615         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23616         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23617         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23618         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23619         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23620         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23621         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23622         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23623         | 0 4 3 4 2 4 3 4 1 4 3 4 2 4 3 4
23624         | 0 4 3 4 2 4 4 4 1 4 4 4 4 4 4 4
23625    15)  | 0 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
23626 */
23627 static const UChar it_length_table[256]
23628    = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23629        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23630        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23631        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23632        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23633        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23634        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23635        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23636        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23637        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23638        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23639        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23640        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23641        0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4,
23642        0, 4, 3, 4, 2, 4, 4, 4, 1, 4, 4, 4, 4, 4, 4, 4,
23643        0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
23644      };
23645
23646
23647 /*------------------------------------------------------------*/
23648 /*--- Top-level fn                                         ---*/
23649 /*------------------------------------------------------------*/
23650
23651 /* Disassemble a single instruction into IR.  The instruction
23652    is located in host memory at &guest_code[delta]. */
23653
23654 DisResult disInstr_ARM ( IRSB*        irsb_IN,
23655                          Bool         (*resteerOkFn) ( void*, Addr ),
23656                          Bool         resteerCisOk,
23657                          void*        callback_opaque,
23658                          const UChar* guest_code_IN,
23659                          Long         delta_ENCODED,
23660                          Addr         guest_IP_ENCODED,
23661                          VexArch      guest_arch,
23662                          const VexArchInfo* archinfo,
23663                          const VexAbiInfo*  abiinfo,
23664                          VexEndness   host_endness_IN,
23665                          Bool         sigill_diag_IN )
23666 {
23667    DisResult dres;
23668    Bool isThumb = (Bool)(guest_IP_ENCODED & 1);
23669
23670    /* Set globals (see top of this file) */
23671    vassert(guest_arch == VexArchARM);
23672
23673    irsb            = irsb_IN;
23674    host_endness    = host_endness_IN;
23675    __curr_is_Thumb = isThumb;
23676
23677    if (isThumb) {
23678       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED - 1;
23679    } else {
23680       guest_R15_curr_instr_notENC = (Addr32)guest_IP_ENCODED;
23681    }
23682
23683    if (isThumb) {
23684       dres = disInstr_THUMB_WRK ( resteerOkFn,
23685                                   resteerCisOk, callback_opaque,
23686                                   &guest_code_IN[delta_ENCODED - 1],
23687                                   archinfo, abiinfo, sigill_diag_IN );
23688    } else {
23689       dres = disInstr_ARM_WRK ( resteerOkFn,
23690                                 resteerCisOk, callback_opaque,
23691                                 &guest_code_IN[delta_ENCODED],
23692                                 archinfo, abiinfo, sigill_diag_IN );
23693    }
23694
23695    return dres;
23696 }
23697
23698 /* Test program for the conversion of IRCmpF64Result values to VFP
23699    nzcv values.  See handling of FCMPD et al above. */
23700 /*
23701 UInt foo ( UInt x )
23702 {
23703    UInt ix    = ((x >> 5) & 3) | (x & 1);
23704    UInt termL = (((((ix ^ 1) << 30) - 1) >> 29) + 1);
23705    UInt termR = (ix & (ix >> 1) & 1);
23706    return termL  -  termR;
23707 }
23708
23709 void try ( char* s, UInt ir, UInt req )
23710 {
23711    UInt act = foo(ir);
23712    printf("%s 0x%02x -> req %d%d%d%d act %d%d%d%d (0x%x)\n",
23713           s, ir, (req >> 3) & 1, (req >> 2) & 1,
23714                  (req >> 1) & 1, (req >> 0) & 1,
23715                  (act >> 3) & 1, (act >> 2) & 1,
23716                  (act >> 1) & 1, (act >> 0) & 1, act);
23717
23718 }
23719
23720 int main ( void )
23721 {
23722    printf("\n");
23723    try("UN", 0x45, 0b0011);
23724    try("LT", 0x01, 0b1000);
23725    try("GT", 0x00, 0b0010);
23726    try("EQ", 0x40, 0b0110);
23727    printf("\n");
23728    return 0;
23729 }
23730 */
23731
23732 /* Spare code for doing reference implementations of various 64-bit
23733    SIMD interleaves/deinterleaves/concatenation ops. */
23734 /*
23735 // Split a 64 bit value into 4 16 bit ones, in 32-bit IRTemps with
23736 // the top halves guaranteed to be zero.
23737 static void break64to16s ( IRTemp* out3, IRTemp* out2, IRTemp* out1,
23738                            IRTemp* out0, IRTemp v64 )
23739 {
23740   if (out3) *out3 = newTemp(Ity_I32);
23741   if (out2) *out2 = newTemp(Ity_I32);
23742   if (out1) *out1 = newTemp(Ity_I32);
23743   if (out0) *out0 = newTemp(Ity_I32);
23744   IRTemp hi32 = newTemp(Ity_I32);
23745   IRTemp lo32 = newTemp(Ity_I32);
23746   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
23747   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
23748   if (out3) assign(*out3, binop(Iop_Shr32, mkexpr(hi32), mkU8(16)));
23749   if (out2) assign(*out2, binop(Iop_And32, mkexpr(hi32), mkU32(0xFFFF)));
23750   if (out1) assign(*out1, binop(Iop_Shr32, mkexpr(lo32), mkU8(16)));
23751   if (out0) assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFFFF)));
23752 }
23753
23754 // Make a 64 bit value from 4 16 bit ones, each of which is in a 32 bit
23755 // IRTemp.
23756 static IRTemp mk64from16s ( IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
23757 {
23758   IRTemp hi32 = newTemp(Ity_I32);
23759   IRTemp lo32 = newTemp(Ity_I32);
23760   assign(hi32,
23761          binop(Iop_Or32,
23762                binop(Iop_Shl32, mkexpr(in3), mkU8(16)),
23763                binop(Iop_And32, mkexpr(in2), mkU32(0xFFFF))));
23764   assign(lo32,
23765          binop(Iop_Or32,
23766                binop(Iop_Shl32, mkexpr(in1), mkU8(16)),
23767                binop(Iop_And32, mkexpr(in0), mkU32(0xFFFF))));
23768   IRTemp res = newTemp(Ity_I64);
23769   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
23770   return res;
23771 }
23772
23773 static IRExpr* mk_InterleaveLO16x4 ( IRTemp a3210, IRTemp b3210 )
23774 {
23775   // returns a1 b1 a0 b0
23776   IRTemp a1, a0, b1, b0;
23777   break64to16s(NULL, NULL, &a1, &a0, a3210);
23778   break64to16s(NULL, NULL, &b1, &b0, b3210);
23779   return mkexpr(mk64from16s(a1, b1, a0, b0));
23780 }
23781
23782 static IRExpr* mk_InterleaveHI16x4 ( IRTemp a3210, IRTemp b3210 )
23783 {
23784   // returns a3 b3 a2 b2
23785   IRTemp a3, a2, b3, b2;
23786   break64to16s(&a3, &a2, NULL, NULL, a3210);
23787   break64to16s(&b3, &b2, NULL, NULL, b3210);
23788   return mkexpr(mk64from16s(a3, b3, a2, b2));
23789 }
23790
23791 static IRExpr* mk_CatEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23792 {
23793   // returns a2 a0 b2 b0
23794   IRTemp a2, a0, b2, b0;
23795   break64to16s(NULL, &a2, NULL, &a0, a3210);
23796   break64to16s(NULL, &b2, NULL, &b0, b3210);
23797   return mkexpr(mk64from16s(a2, a0, b2, b0));
23798 }
23799
23800 static IRExpr* mk_CatOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23801 {
23802   // returns a3 a1 b3 b1
23803   IRTemp a3, a1, b3, b1;
23804   break64to16s(&a3, NULL, &a1, NULL, a3210);
23805   break64to16s(&b3, NULL, &b1, NULL, b3210);
23806   return mkexpr(mk64from16s(a3, a1, b3, b1));
23807 }
23808
23809 static IRExpr* mk_InterleaveOddLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23810 {
23811   // returns a3 b3 a1 b1
23812   IRTemp a3, b3, a1, b1;
23813   break64to16s(&a3, NULL, &a1, NULL, a3210);
23814   break64to16s(&b3, NULL, &b1, NULL, b3210);
23815   return mkexpr(mk64from16s(a3, b3, a1, b1));
23816 }
23817
23818 static IRExpr* mk_InterleaveEvenLanes16x4 ( IRTemp a3210, IRTemp b3210 )
23819 {
23820   // returns a2 b2 a0 b0
23821   IRTemp a2, b2, a0, b0;
23822   break64to16s(NULL, &a2, NULL, &a0, a3210);
23823   break64to16s(NULL, &b2, NULL, &b0, b3210);
23824   return mkexpr(mk64from16s(a2, b2, a0, b0));
23825 }
23826
23827 static void break64to8s ( IRTemp* out7, IRTemp* out6, IRTemp* out5,
23828                           IRTemp* out4, IRTemp* out3, IRTemp* out2,
23829                           IRTemp* out1,IRTemp* out0, IRTemp v64 )
23830 {
23831   if (out7) *out7 = newTemp(Ity_I32);
23832   if (out6) *out6 = newTemp(Ity_I32);
23833   if (out5) *out5 = newTemp(Ity_I32);
23834   if (out4) *out4 = newTemp(Ity_I32);
23835   if (out3) *out3 = newTemp(Ity_I32);
23836   if (out2) *out2 = newTemp(Ity_I32);
23837   if (out1) *out1 = newTemp(Ity_I32);
23838   if (out0) *out0 = newTemp(Ity_I32);
23839   IRTemp hi32 = newTemp(Ity_I32);
23840   IRTemp lo32 = newTemp(Ity_I32);
23841   assign(hi32, unop(Iop_64HIto32, mkexpr(v64)) );
23842   assign(lo32, unop(Iop_64to32, mkexpr(v64)) );
23843   if (out7)
23844     assign(*out7, binop(Iop_And32,
23845                         binop(Iop_Shr32, mkexpr(hi32), mkU8(24)),
23846                         mkU32(0xFF)));
23847   if (out6)
23848     assign(*out6, binop(Iop_And32,
23849                         binop(Iop_Shr32, mkexpr(hi32), mkU8(16)),
23850                         mkU32(0xFF)));
23851   if (out5)
23852     assign(*out5, binop(Iop_And32,
23853                         binop(Iop_Shr32, mkexpr(hi32), mkU8(8)),
23854                         mkU32(0xFF)));
23855   if (out4)
23856     assign(*out4, binop(Iop_And32, mkexpr(hi32), mkU32(0xFF)));
23857   if (out3)
23858     assign(*out3, binop(Iop_And32,
23859                         binop(Iop_Shr32, mkexpr(lo32), mkU8(24)),
23860                         mkU32(0xFF)));
23861   if (out2)
23862     assign(*out2, binop(Iop_And32,
23863                         binop(Iop_Shr32, mkexpr(lo32), mkU8(16)),
23864                         mkU32(0xFF)));
23865   if (out1)
23866     assign(*out1, binop(Iop_And32,
23867                         binop(Iop_Shr32, mkexpr(lo32), mkU8(8)),
23868                         mkU32(0xFF)));
23869   if (out0)
23870     assign(*out0, binop(Iop_And32, mkexpr(lo32), mkU32(0xFF)));
23871 }
23872
23873 static IRTemp mk64from8s ( IRTemp in7, IRTemp in6, IRTemp in5, IRTemp in4,
23874                            IRTemp in3, IRTemp in2, IRTemp in1, IRTemp in0 )
23875 {
23876   IRTemp hi32 = newTemp(Ity_I32);
23877   IRTemp lo32 = newTemp(Ity_I32);
23878   assign(hi32,
23879          binop(Iop_Or32,
23880                binop(Iop_Or32,
23881                      binop(Iop_Shl32,
23882                            binop(Iop_And32, mkexpr(in7), mkU32(0xFF)),
23883                            mkU8(24)),
23884                      binop(Iop_Shl32,
23885                            binop(Iop_And32, mkexpr(in6), mkU32(0xFF)),
23886                            mkU8(16))),
23887                binop(Iop_Or32,
23888                      binop(Iop_Shl32,
23889                            binop(Iop_And32, mkexpr(in5), mkU32(0xFF)), mkU8(8)),
23890                      binop(Iop_And32,
23891                            mkexpr(in4), mkU32(0xFF)))));
23892   assign(lo32,
23893          binop(Iop_Or32,
23894                binop(Iop_Or32,
23895                      binop(Iop_Shl32,
23896                            binop(Iop_And32, mkexpr(in3), mkU32(0xFF)),
23897                            mkU8(24)),
23898                      binop(Iop_Shl32,
23899                            binop(Iop_And32, mkexpr(in2), mkU32(0xFF)),
23900                            mkU8(16))),
23901                binop(Iop_Or32,
23902                      binop(Iop_Shl32,
23903                            binop(Iop_And32, mkexpr(in1), mkU32(0xFF)), mkU8(8)),
23904                      binop(Iop_And32,
23905                            mkexpr(in0), mkU32(0xFF)))));
23906   IRTemp res = newTemp(Ity_I64);
23907   assign(res, binop(Iop_32HLto64, mkexpr(hi32), mkexpr(lo32)));
23908   return res;
23909 }
23910
23911 static IRExpr* mk_InterleaveLO8x8 ( IRTemp a76543210, IRTemp b76543210 )
23912 {
23913   // returns a3 b3 a2 b2 a1 b1 a0 b0
23914   IRTemp a3, b3, a2, b2, a1, a0, b1, b0;
23915   break64to8s(NULL, NULL, NULL, NULL, &a3, &a2, &a1, &a0, a76543210);
23916   break64to8s(NULL, NULL, NULL, NULL, &b3, &b2, &b1, &b0, b76543210);
23917   return mkexpr(mk64from8s(a3, b3, a2, b2, a1, b1, a0, b0));
23918 }
23919
23920 static IRExpr* mk_InterleaveHI8x8 ( IRTemp a76543210, IRTemp b76543210 )
23921 {
23922   // returns a7 b7 a6 b6 a5 b5 a4 b4
23923   IRTemp a7, b7, a6, b6, a5, b5, a4, b4;
23924   break64to8s(&a7, &a6, &a5, &a4, NULL, NULL, NULL, NULL, a76543210);
23925   break64to8s(&b7, &b6, &b5, &b4, NULL, NULL, NULL, NULL, b76543210);
23926   return mkexpr(mk64from8s(a7, b7, a6, b6, a5, b5, a4, b4));
23927 }
23928
23929 static IRExpr* mk_CatEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23930 {
23931   // returns a6 a4 a2 a0 b6 b4 b2 b0
23932   IRTemp a6, a4, a2, a0, b6, b4, b2, b0;
23933   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
23934   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
23935   return mkexpr(mk64from8s(a6, a4, a2, a0, b6, b4, b2, b0));
23936 }
23937
23938 static IRExpr* mk_CatOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23939 {
23940   // returns a7 a5 a3 a1 b7 b5 b3 b1
23941   IRTemp a7, a5, a3, a1, b7, b5, b3, b1;
23942   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
23943   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
23944   return mkexpr(mk64from8s(a7, a5, a3, a1, b7, b5, b3, b1));
23945 }
23946
23947 static IRExpr* mk_InterleaveEvenLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23948 {
23949   // returns a6 b6 a4 b4 a2 b2 a0 b0
23950   IRTemp a6, b6, a4, b4, a2, b2, a0, b0;
23951   break64to8s(NULL, &a6, NULL, &a4, NULL, &a2, NULL, &a0, a76543210);
23952   break64to8s(NULL, &b6, NULL, &b4, NULL, &b2, NULL, &b0, b76543210);
23953   return mkexpr(mk64from8s(a6, b6, a4, b4, a2, b2, a0, b0));
23954 }
23955
23956 static IRExpr* mk_InterleaveOddLanes8x8 ( IRTemp a76543210, IRTemp b76543210 )
23957 {
23958   // returns a7 b7 a5 b5 a3 b3 a1 b1
23959   IRTemp a7, b7, a5, b5, a3, b3, a1, b1;
23960   break64to8s(&a7, NULL, &a5, NULL, &a3, NULL, &a1, NULL, a76543210);
23961   break64to8s(&b7, NULL, &b5, NULL, &b3, NULL, &b1, NULL, b76543210);
23962   return mkexpr(mk64from8s(a7, b7, a5, b5, a3, b3, a1, b1));
23963 }
23964
23965 static IRExpr* mk_InterleaveLO32x2 ( IRTemp a10, IRTemp b10 )
23966 {
23967   // returns a0 b0
23968   return binop(Iop_32HLto64, unop(Iop_64to32, mkexpr(a10)),
23969                              unop(Iop_64to32, mkexpr(b10)));
23970 }
23971
23972 static IRExpr* mk_InterleaveHI32x2 ( IRTemp a10, IRTemp b10 )
23973 {
23974   // returns a1 b1
23975   return binop(Iop_32HLto64, unop(Iop_64HIto32, mkexpr(a10)),
23976                              unop(Iop_64HIto32, mkexpr(b10)));
23977 }
23978 */
23979
23980 /*--------------------------------------------------------------------*/
23981 /*--- end                                         guest_arm_toIR.c ---*/
23982 /*--------------------------------------------------------------------*/