target/arm/translate.c

   1 /*
   2  *  ARM translation
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20  */
  21 #include "qemu/osdep.h"
  22
  23 #include "cpu.h"
  24 #include "internals.h"
  25 #include "disas/disas.h"
  26 #include "exec/exec-all.h"
  27 #include "tcg/tcg-op.h"
  28 #include "tcg/tcg-op-gvec.h"
  29 #include "qemu/log.h"
  30 #include "qemu/bitops.h"
  31 #include "arm_ldst.h"
  32 #include "hw/semihosting/semihost.h"
  33
  34 #include "exec/helper-proto.h"
  35 #include "exec/helper-gen.h"
  36
  37 #include "trace-tcg.h"
  38 #include "exec/log.h"
  39
  40
  41 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  42 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  43 /* currently all emulated v5 cores are also v5TE, so don't bother */
  44 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  45 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  46 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  47 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  48 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  49 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  50 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  51
  52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
  53
  54 #include "translate.h"
  55
  56 #if defined(CONFIG_USER_ONLY)
  57 #define IS_USER(s) 1
  58 #else
  59 #define IS_USER(s) (s->user)
  60 #endif
  61
  62 /* We reuse the same 64-bit temporaries for efficiency.  */
  63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  64 static TCGv_i32 cpu_R[16];
  65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  66 TCGv_i64 cpu_exclusive_addr;
  67 TCGv_i64 cpu_exclusive_val;
  68
  69 #include "exec/gen-icount.h"
  70
  71 static const char * const regnames[] =
  72     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  73       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  74
  75 /* Function prototypes for gen_ functions calling Neon helpers.  */
  76 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
  77                                  TCGv_i32, TCGv_i32);
  78 /* Function prototypes for gen_ functions for fix point conversions */
  79 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  80
  81 /* initialize TCG globals.  */
  82 void arm_translate_init(void)
  83 {
  84     int i;
  85
  86     for (i = 0; i < 16; i++) {
  87         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  88                                           offsetof(CPUARMState, regs[i]),
  89                                           regnames[i]);
  90     }
  91     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  92     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  93     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  94     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  95
  96     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  97         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  98     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  99         offsetof(CPUARMState, exclusive_val), "exclusive_val");
 100
 101     a64_translate_init();
 102 }
 103
 104 /* Flags for the disas_set_da_iss info argument:
 105  * lower bits hold the Rt register number, higher bits are flags.
 106  */
 107 typedef enum ISSInfo {
 108     ISSNone = 0,
 109     ISSRegMask = 0x1f,
 110     ISSInvalid = (1 << 5),
 111     ISSIsAcqRel = (1 << 6),
 112     ISSIsWrite = (1 << 7),
 113     ISSIs16Bit = (1 << 8),
 114 } ISSInfo;
 115
 116 /* Save the syndrome information for a Data Abort */
 117 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 118 {
 119     uint32_t syn;
 120     int sas = memop & MO_SIZE;
 121     bool sse = memop & MO_SIGN;
 122     bool is_acqrel = issinfo & ISSIsAcqRel;
 123     bool is_write = issinfo & ISSIsWrite;
 124     bool is_16bit = issinfo & ISSIs16Bit;
 125     int srt = issinfo & ISSRegMask;
 126
 127     if (issinfo & ISSInvalid) {
 128         /* Some callsites want to conditionally provide ISS info,
 129          * eg "only if this was not a writeback"
 130          */
 131         return;
 132     }
 133
 134     if (srt == 15) {
 135         /* For AArch32, insns where the src/dest is R15 never generate
 136          * ISS information. Catching that here saves checking at all
 137          * the call sites.
 138          */
 139         return;
 140     }
 141
 142     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 143                                   0, 0, 0, is_write, 0, is_16bit);
 144     disas_set_insn_syndrome(s, syn);
 145 }
 146
 147 static inline int get_a32_user_mem_index(DisasContext *s)
 148 {
 149     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 150      * insns:
 151      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 152      *  otherwise, access as if at PL0.
 153      */
 154     switch (s->mmu_idx) {
 155     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 156     case ARMMMUIdx_E10_0:
 157     case ARMMMUIdx_E10_1:
 158     case ARMMMUIdx_E10_1_PAN:
 159         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 160     case ARMMMUIdx_SE3:
 161     case ARMMMUIdx_SE10_0:
 162     case ARMMMUIdx_SE10_1:
 163     case ARMMMUIdx_SE10_1_PAN:
 164         return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
 165     case ARMMMUIdx_MUser:
 166     case ARMMMUIdx_MPriv:
 167         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 168     case ARMMMUIdx_MUserNegPri:
 169     case ARMMMUIdx_MPrivNegPri:
 170         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 171     case ARMMMUIdx_MSUser:
 172     case ARMMMUIdx_MSPriv:
 173         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 174     case ARMMMUIdx_MSUserNegPri:
 175     case ARMMMUIdx_MSPrivNegPri:
 176         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 177     default:
 178         g_assert_not_reached();
 179     }
 180 }
 181
 182 static inline TCGv_i32 load_cpu_offset(int offset)
 183 {
 184     TCGv_i32 tmp = tcg_temp_new_i32();
 185     tcg_gen_ld_i32(tmp, cpu_env, offset);
 186     return tmp;
 187 }
 188
 189 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
 190
 191 static inline void store_cpu_offset(TCGv_i32 var, int offset)
 192 {
 193     tcg_gen_st_i32(var, cpu_env, offset);
 194     tcg_temp_free_i32(var);
 195 }
 196
 197 #define store_cpu_field(var, name) \
 198     store_cpu_offset(var, offsetof(CPUARMState, name))
 199
 200 /* The architectural value of PC.  */
 201 static uint32_t read_pc(DisasContext *s)
 202 {
 203     return s->pc_curr + (s->thumb ? 4 : 8);
 204 }
 205
 206 /* Set a variable to the value of a CPU register.  */
 207 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 208 {
 209     if (reg == 15) {
 210         tcg_gen_movi_i32(var, read_pc(s));
 211     } else {
 212         tcg_gen_mov_i32(var, cpu_R[reg]);
 213     }
 214 }
 215
 216 /* Create a new temporary and set it to the value of a CPU register.  */
 217 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
 218 {
 219     TCGv_i32 tmp = tcg_temp_new_i32();
 220     load_reg_var(s, tmp, reg);
 221     return tmp;
 222 }
 223
 224 /*
 225  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 226  * This is used for load/store for which use of PC implies (literal),
 227  * or ADD that implies ADR.
 228  */
 229 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 230 {
 231     TCGv_i32 tmp = tcg_temp_new_i32();
 232
 233     if (reg == 15) {
 234         tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
 235     } else {
 236         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 237     }
 238     return tmp;
 239 }
 240
 241 /* Set a CPU register.  The source must be a temporary and will be
 242    marked as dead.  */
 243 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 244 {
 245     if (reg == 15) {
 246         /* In Thumb mode, we must ignore bit 0.
 247          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 248          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 249          * We choose to ignore [1:0] in ARM mode for all architecture versions.
 250          */
 251         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 252         s->base.is_jmp = DISAS_JUMP;
 253     }
 254     tcg_gen_mov_i32(cpu_R[reg], var);
 255     tcg_temp_free_i32(var);
 256 }
 257
 258 /*
 259  * Variant of store_reg which applies v8M stack-limit checks before updating
 260  * SP. If the check fails this will result in an exception being taken.
 261  * We disable the stack checks for CONFIG_USER_ONLY because we have
 262  * no idea what the stack limits should be in that case.
 263  * If stack checking is not being done this just acts like store_reg().
 264  */
 265 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 266 {
 267 #ifndef CONFIG_USER_ONLY
 268     if (s->v8m_stackcheck) {
 269         gen_helper_v8m_stackcheck(cpu_env, var);
 270     }
 271 #endif
 272     store_reg(s, 13, var);
 273 }
 274
 275 /* Value extensions.  */
 276 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 277 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 278 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 279 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 280
 281 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 282 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 283
 284
 285 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 286 {
 287     TCGv_i32 tmp_mask = tcg_const_i32(mask);
 288     gen_helper_cpsr_write(cpu_env, var, tmp_mask);
 289     tcg_temp_free_i32(tmp_mask);
 290 }
 291 /* Set NZCV flags from the high 4 bits of var.  */
 292 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 293
 294 static void gen_exception_internal(int excp)
 295 {
 296     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 297
 298     assert(excp_is_internal(excp));
 299     gen_helper_exception_internal(cpu_env, tcg_excp);
 300     tcg_temp_free_i32(tcg_excp);
 301 }
 302
 303 static void gen_step_complete_exception(DisasContext *s)
 304 {
 305     /* We just completed step of an insn. Move from Active-not-pending
 306      * to Active-pending, and then also take the swstep exception.
 307      * This corresponds to making the (IMPDEF) choice to prioritize
 308      * swstep exceptions over asynchronous exceptions taken to an exception
 309      * level where debug is disabled. This choice has the advantage that
 310      * we do not need to maintain internal state corresponding to the
 311      * ISV/EX syndrome bits between completion of the step and generation
 312      * of the exception, and our syndrome information is always correct.
 313      */
 314     gen_ss_advance(s);
 315     gen_swstep_exception(s, 1, s->is_ldex);
 316     s->base.is_jmp = DISAS_NORETURN;
 317 }
 318
 319 static void gen_singlestep_exception(DisasContext *s)
 320 {
 321     /* Generate the right kind of exception for singlestep, which is
 322      * either the architectural singlestep or EXCP_DEBUG for QEMU's
 323      * gdb singlestepping.
 324      */
 325     if (s->ss_active) {
 326         gen_step_complete_exception(s);
 327     } else {
 328         gen_exception_internal(EXCP_DEBUG);
 329     }
 330 }
 331
 332 static inline bool is_singlestepping(DisasContext *s)
 333 {
 334     /* Return true if we are singlestepping either because of
 335      * architectural singlestep or QEMU gdbstub singlestep. This does
 336      * not include the command line '-singlestep' mode which is rather
 337      * misnamed as it only means "one instruction per TB" and doesn't
 338      * affect the code we generate.
 339      */
 340     return s->base.singlestep_enabled || s->ss_active;
 341 }
 342
 343 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 344 {
 345     TCGv_i32 tmp1 = tcg_temp_new_i32();
 346     TCGv_i32 tmp2 = tcg_temp_new_i32();
 347     tcg_gen_ext16s_i32(tmp1, a);
 348     tcg_gen_ext16s_i32(tmp2, b);
 349     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 350     tcg_temp_free_i32(tmp2);
 351     tcg_gen_sari_i32(a, a, 16);
 352     tcg_gen_sari_i32(b, b, 16);
 353     tcg_gen_mul_i32(b, b, a);
 354     tcg_gen_mov_i32(a, tmp1);
 355     tcg_temp_free_i32(tmp1);
 356 }
 357
 358 /* Byteswap each halfword.  */
 359 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 360 {
 361     TCGv_i32 tmp = tcg_temp_new_i32();
 362     TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
 363     tcg_gen_shri_i32(tmp, var, 8);
 364     tcg_gen_and_i32(tmp, tmp, mask);
 365     tcg_gen_and_i32(var, var, mask);
 366     tcg_gen_shli_i32(var, var, 8);
 367     tcg_gen_or_i32(dest, var, tmp);
 368     tcg_temp_free_i32(mask);
 369     tcg_temp_free_i32(tmp);
 370 }
 371
 372 /* Byteswap low halfword and sign extend.  */
 373 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 374 {
 375     tcg_gen_ext16u_i32(var, var);
 376     tcg_gen_bswap16_i32(var, var);
 377     tcg_gen_ext16s_i32(dest, var);
 378 }
 379
 380 /* 32x32->64 multiply.  Marks inputs as dead.  */
 381 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
 382 {
 383     TCGv_i32 lo = tcg_temp_new_i32();
 384     TCGv_i32 hi = tcg_temp_new_i32();
 385     TCGv_i64 ret;
 386
 387     tcg_gen_mulu2_i32(lo, hi, a, b);
 388     tcg_temp_free_i32(a);
 389     tcg_temp_free_i32(b);
 390
 391     ret = tcg_temp_new_i64();
 392     tcg_gen_concat_i32_i64(ret, lo, hi);
 393     tcg_temp_free_i32(lo);
 394     tcg_temp_free_i32(hi);
 395
 396     return ret;
 397 }
 398
 399 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
 400 {
 401     TCGv_i32 lo = tcg_temp_new_i32();
 402     TCGv_i32 hi = tcg_temp_new_i32();
 403     TCGv_i64 ret;
 404
 405     tcg_gen_muls2_i32(lo, hi, a, b);
 406     tcg_temp_free_i32(a);
 407     tcg_temp_free_i32(b);
 408
 409     ret = tcg_temp_new_i64();
 410     tcg_gen_concat_i32_i64(ret, lo, hi);
 411     tcg_temp_free_i32(lo);
 412     tcg_temp_free_i32(hi);
 413
 414     return ret;
 415 }
 416
 417 /* Swap low and high halfwords.  */
 418 static void gen_swap_half(TCGv_i32 var)
 419 {
 420     tcg_gen_rotri_i32(var, var, 16);
 421 }
 422
 423 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 424     tmp = (t0 ^ t1) & 0x8000;
 425     t0 &= ~0x8000;
 426     t1 &= ~0x8000;
 427     t0 = (t0 + t1) ^ tmp;
 428  */
 429
 430 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 431 {
 432     TCGv_i32 tmp = tcg_temp_new_i32();
 433     tcg_gen_xor_i32(tmp, t0, t1);
 434     tcg_gen_andi_i32(tmp, tmp, 0x8000);
 435     tcg_gen_andi_i32(t0, t0, ~0x8000);
 436     tcg_gen_andi_i32(t1, t1, ~0x8000);
 437     tcg_gen_add_i32(t0, t0, t1);
 438     tcg_gen_xor_i32(dest, t0, tmp);
 439     tcg_temp_free_i32(tmp);
 440 }
 441
 442 /* Set N and Z flags from var.  */
 443 static inline void gen_logic_CC(TCGv_i32 var)
 444 {
 445     tcg_gen_mov_i32(cpu_NF, var);
 446     tcg_gen_mov_i32(cpu_ZF, var);
 447 }
 448
 449 /* dest = T0 + T1 + CF. */
 450 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 451 {
 452     tcg_gen_add_i32(dest, t0, t1);
 453     tcg_gen_add_i32(dest, dest, cpu_CF);
 454 }
 455
 456 /* dest = T0 - T1 + CF - 1.  */
 457 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 458 {
 459     tcg_gen_sub_i32(dest, t0, t1);
 460     tcg_gen_add_i32(dest, dest, cpu_CF);
 461     tcg_gen_subi_i32(dest, dest, 1);
 462 }
 463
 464 /* dest = T0 + T1. Compute C, N, V and Z flags */
 465 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 466 {
 467     TCGv_i32 tmp = tcg_temp_new_i32();
 468     tcg_gen_movi_i32(tmp, 0);
 469     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 470     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 471     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 472     tcg_gen_xor_i32(tmp, t0, t1);
 473     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 474     tcg_temp_free_i32(tmp);
 475     tcg_gen_mov_i32(dest, cpu_NF);
 476 }
 477
 478 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 479 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 480 {
 481     TCGv_i32 tmp = tcg_temp_new_i32();
 482     if (TCG_TARGET_HAS_add2_i32) {
 483         tcg_gen_movi_i32(tmp, 0);
 484         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 485         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 486     } else {
 487         TCGv_i64 q0 = tcg_temp_new_i64();
 488         TCGv_i64 q1 = tcg_temp_new_i64();
 489         tcg_gen_extu_i32_i64(q0, t0);
 490         tcg_gen_extu_i32_i64(q1, t1);
 491         tcg_gen_add_i64(q0, q0, q1);
 492         tcg_gen_extu_i32_i64(q1, cpu_CF);
 493         tcg_gen_add_i64(q0, q0, q1);
 494         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 495         tcg_temp_free_i64(q0);
 496         tcg_temp_free_i64(q1);
 497     }
 498     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 499     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 500     tcg_gen_xor_i32(tmp, t0, t1);
 501     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 502     tcg_temp_free_i32(tmp);
 503     tcg_gen_mov_i32(dest, cpu_NF);
 504 }
 505
 506 /* dest = T0 - T1. Compute C, N, V and Z flags */
 507 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 508 {
 509     TCGv_i32 tmp;
 510     tcg_gen_sub_i32(cpu_NF, t0, t1);
 511     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 512     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 513     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 514     tmp = tcg_temp_new_i32();
 515     tcg_gen_xor_i32(tmp, t0, t1);
 516     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 517     tcg_temp_free_i32(tmp);
 518     tcg_gen_mov_i32(dest, cpu_NF);
 519 }
 520
 521 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 522 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 523 {
 524     TCGv_i32 tmp = tcg_temp_new_i32();
 525     tcg_gen_not_i32(tmp, t1);
 526     gen_adc_CC(dest, t0, tmp);
 527     tcg_temp_free_i32(tmp);
 528 }
 529
 530 #define GEN_SHIFT(name)                                               \
 531 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 532 {                                                                     \
 533     TCGv_i32 tmp1, tmp2, tmp3;                                        \
 534     tmp1 = tcg_temp_new_i32();                                        \
 535     tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
 536     tmp2 = tcg_const_i32(0);                                          \
 537     tmp3 = tcg_const_i32(0x1f);                                       \
 538     tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
 539     tcg_temp_free_i32(tmp3);                                          \
 540     tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
 541     tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
 542     tcg_temp_free_i32(tmp2);                                          \
 543     tcg_temp_free_i32(tmp1);                                          \
 544 }
 545 GEN_SHIFT(shl)
 546 GEN_SHIFT(shr)
 547 #undef GEN_SHIFT
 548
 549 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 550 {
 551     TCGv_i32 tmp1, tmp2;
 552     tmp1 = tcg_temp_new_i32();
 553     tcg_gen_andi_i32(tmp1, t1, 0xff);
 554     tmp2 = tcg_const_i32(0x1f);
 555     tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
 556     tcg_temp_free_i32(tmp2);
 557     tcg_gen_sar_i32(dest, t0, tmp1);
 558     tcg_temp_free_i32(tmp1);
 559 }
 560
 561 static void shifter_out_im(TCGv_i32 var, int shift)
 562 {
 563     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 564 }
 565
 566 /* Shift by immediate.  Includes special handling for shift == 0.  */
 567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 568                                     int shift, int flags)
 569 {
 570     switch (shiftop) {
 571     case 0: /* LSL */
 572         if (shift != 0) {
 573             if (flags)
 574                 shifter_out_im(var, 32 - shift);
 575             tcg_gen_shli_i32(var, var, shift);
 576         }
 577         break;
 578     case 1: /* LSR */
 579         if (shift == 0) {
 580             if (flags) {
 581                 tcg_gen_shri_i32(cpu_CF, var, 31);
 582             }
 583             tcg_gen_movi_i32(var, 0);
 584         } else {
 585             if (flags)
 586                 shifter_out_im(var, shift - 1);
 587             tcg_gen_shri_i32(var, var, shift);
 588         }
 589         break;
 590     case 2: /* ASR */
 591         if (shift == 0)
 592             shift = 32;
 593         if (flags)
 594             shifter_out_im(var, shift - 1);
 595         if (shift == 32)
 596           shift = 31;
 597         tcg_gen_sari_i32(var, var, shift);
 598         break;
 599     case 3: /* ROR/RRX */
 600         if (shift != 0) {
 601             if (flags)
 602                 shifter_out_im(var, shift - 1);
 603             tcg_gen_rotri_i32(var, var, shift); break;
 604         } else {
 605             TCGv_i32 tmp = tcg_temp_new_i32();
 606             tcg_gen_shli_i32(tmp, cpu_CF, 31);
 607             if (flags)
 608                 shifter_out_im(var, 0);
 609             tcg_gen_shri_i32(var, var, 1);
 610             tcg_gen_or_i32(var, var, tmp);
 611             tcg_temp_free_i32(tmp);
 612         }
 613     }
 614 };
 615
 616 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 617                                      TCGv_i32 shift, int flags)
 618 {
 619     if (flags) {
 620         switch (shiftop) {
 621         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 622         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 623         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 624         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 625         }
 626     } else {
 627         switch (shiftop) {
 628         case 0:
 629             gen_shl(var, var, shift);
 630             break;
 631         case 1:
 632             gen_shr(var, var, shift);
 633             break;
 634         case 2:
 635             gen_sar(var, var, shift);
 636             break;
 637         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 638                 tcg_gen_rotr_i32(var, var, shift); break;
 639         }
 640     }
 641     tcg_temp_free_i32(shift);
 642 }
 643
 644 /*
 645  * Generate a conditional based on ARM condition code cc.
 646  * This is common between ARM and Aarch64 targets.
 647  */
 648 void arm_test_cc(DisasCompare *cmp, int cc)
 649 {
 650     TCGv_i32 value;
 651     TCGCond cond;
 652     bool global = true;
 653
 654     switch (cc) {
 655     case 0: /* eq: Z */
 656     case 1: /* ne: !Z */
 657         cond = TCG_COND_EQ;
 658         value = cpu_ZF;
 659         break;
 660
 661     case 2: /* cs: C */
 662     case 3: /* cc: !C */
 663         cond = TCG_COND_NE;
 664         value = cpu_CF;
 665         break;
 666
 667     case 4: /* mi: N */
 668     case 5: /* pl: !N */
 669         cond = TCG_COND_LT;
 670         value = cpu_NF;
 671         break;
 672
 673     case 6: /* vs: V */
 674     case 7: /* vc: !V */
 675         cond = TCG_COND_LT;
 676         value = cpu_VF;
 677         break;
 678
 679     case 8: /* hi: C && !Z */
 680     case 9: /* ls: !C || Z -> !(C && !Z) */
 681         cond = TCG_COND_NE;
 682         value = tcg_temp_new_i32();
 683         global = false;
 684         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 685            ZF is non-zero for !Z; so AND the two subexpressions.  */
 686         tcg_gen_neg_i32(value, cpu_CF);
 687         tcg_gen_and_i32(value, value, cpu_ZF);
 688         break;
 689
 690     case 10: /* ge: N == V -> N ^ V == 0 */
 691     case 11: /* lt: N != V -> N ^ V != 0 */
 692         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 693         cond = TCG_COND_GE;
 694         value = tcg_temp_new_i32();
 695         global = false;
 696         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 697         break;
 698
 699     case 12: /* gt: !Z && N == V */
 700     case 13: /* le: Z || N != V */
 701         cond = TCG_COND_NE;
 702         value = tcg_temp_new_i32();
 703         global = false;
 704         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 705          * the sign bit then AND with ZF to yield the result.  */
 706         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 707         tcg_gen_sari_i32(value, value, 31);
 708         tcg_gen_andc_i32(value, cpu_ZF, value);
 709         break;
 710
 711     case 14: /* always */
 712     case 15: /* always */
 713         /* Use the ALWAYS condition, which will fold early.
 714          * It doesn't matter what we use for the value.  */
 715         cond = TCG_COND_ALWAYS;
 716         value = cpu_ZF;
 717         goto no_invert;
 718
 719     default:
 720         fprintf(stderr, "Bad condition code 0x%x\n", cc);
 721         abort();
 722     }
 723
 724     if (cc & 1) {
 725         cond = tcg_invert_cond(cond);
 726     }
 727
 728  no_invert:
 729     cmp->cond = cond;
 730     cmp->value = value;
 731     cmp->value_global = global;
 732 }
 733
 734 void arm_free_cc(DisasCompare *cmp)
 735 {
 736     if (!cmp->value_global) {
 737         tcg_temp_free_i32(cmp->value);
 738     }
 739 }
 740
 741 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 742 {
 743     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 744 }
 745
 746 void arm_gen_test_cc(int cc, TCGLabel *label)
 747 {
 748     DisasCompare cmp;
 749     arm_test_cc(&cmp, cc);
 750     arm_jump_cc(&cmp, label);
 751     arm_free_cc(&cmp);
 752 }
 753
 754 static inline void gen_set_condexec(DisasContext *s)
 755 {
 756     if (s->condexec_mask) {
 757         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 758         TCGv_i32 tmp = tcg_temp_new_i32();
 759         tcg_gen_movi_i32(tmp, val);
 760         store_cpu_field(tmp, condexec_bits);
 761     }
 762 }
 763
 764 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 765 {
 766     tcg_gen_movi_i32(cpu_R[15], val);
 767 }
 768
 769 /* Set PC and Thumb state from var.  var is marked as dead.  */
 770 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 771 {
 772     s->base.is_jmp = DISAS_JUMP;
 773     tcg_gen_andi_i32(cpu_R[15], var, ~1);
 774     tcg_gen_andi_i32(var, var, 1);
 775     store_cpu_field(var, thumb);
 776 }
 777
 778 /*
 779  * Set PC and Thumb state from var. var is marked as dead.
 780  * For M-profile CPUs, include logic to detect exception-return
 781  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 782  * and BX reg, and no others, and happens only for code in Handler mode.
 783  * The Security Extension also requires us to check for the FNC_RETURN
 784  * which signals a function return from non-secure state; this can happen
 785  * in both Handler and Thread mode.
 786  * To avoid having to do multiple comparisons in inline generated code,
 787  * we make the check we do here loose, so it will match for EXC_RETURN
 788  * in Thread mode. For system emulation do_v7m_exception_exit() checks
 789  * for these spurious cases and returns without doing anything (giving
 790  * the same behaviour as for a branch to a non-magic address).
 791  *
 792  * In linux-user mode it is unclear what the right behaviour for an
 793  * attempted FNC_RETURN should be, because in real hardware this will go
 794  * directly to Secure code (ie not the Linux kernel) which will then treat
 795  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 796  * attempt behave the way it would on a CPU without the security extension,
 797  * which is to say "like a normal branch". That means we can simply treat
 798  * all branches as normal with no magic address behaviour.
 799  */
 800 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 801 {
 802     /* Generate the same code here as for a simple bx, but flag via
 803      * s->base.is_jmp that we need to do the rest of the work later.
 804      */
 805     gen_bx(s, var);
 806 #ifndef CONFIG_USER_ONLY
 807     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 808         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 809         s->base.is_jmp = DISAS_BX_EXCRET;
 810     }
 811 #endif
 812 }
 813
 814 static inline void gen_bx_excret_final_code(DisasContext *s)
 815 {
 816     /* Generate the code to finish possible exception return and end the TB */
 817     TCGLabel *excret_label = gen_new_label();
 818     uint32_t min_magic;
 819
 820     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 821         /* Covers FNC_RETURN and EXC_RETURN magic */
 822         min_magic = FNC_RETURN_MIN_MAGIC;
 823     } else {
 824         /* EXC_RETURN magic only */
 825         min_magic = EXC_RETURN_MIN_MAGIC;
 826     }
 827
 828     /* Is the new PC value in the magic range indicating exception return? */
 829     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
 830     /* No: end the TB as we would for a DISAS_JMP */
 831     if (is_singlestepping(s)) {
 832         gen_singlestep_exception(s);
 833     } else {
 834         tcg_gen_exit_tb(NULL, 0);
 835     }
 836     gen_set_label(excret_label);
 837     /* Yes: this is an exception return.
 838      * At this point in runtime env->regs[15] and env->thumb will hold
 839      * the exception-return magic number, which do_v7m_exception_exit()
 840      * will read. Nothing else will be able to see those values because
 841      * the cpu-exec main loop guarantees that we will always go straight
 842      * from raising the exception to the exception-handling code.
 843      *
 844      * gen_ss_advance(s) does nothing on M profile currently but
 845      * calling it is conceptually the right thing as we have executed
 846      * this instruction (compare SWI, HVC, SMC handling).
 847      */
 848     gen_ss_advance(s);
 849     gen_exception_internal(EXCP_EXCEPTION_EXIT);
 850 }
 851
 852 static inline void gen_bxns(DisasContext *s, int rm)
 853 {
 854     TCGv_i32 var = load_reg(s, rm);
 855
 856     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 857      * we need to sync state before calling it, but:
 858      *  - we don't need to do gen_set_pc_im() because the bxns helper will
 859      *    always set the PC itself
 860      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 861      *    unless it's outside an IT block or the last insn in an IT block,
 862      *    so we know that condexec == 0 (already set at the top of the TB)
 863      *    is correct in the non-UNPREDICTABLE cases, and we can choose
 864      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 865      */
 866     gen_helper_v7m_bxns(cpu_env, var);
 867     tcg_temp_free_i32(var);
 868     s->base.is_jmp = DISAS_EXIT;
 869 }
 870
 871 static inline void gen_blxns(DisasContext *s, int rm)
 872 {
 873     TCGv_i32 var = load_reg(s, rm);
 874
 875     /* We don't need to sync condexec state, for the same reason as bxns.
 876      * We do however need to set the PC, because the blxns helper reads it.
 877      * The blxns helper may throw an exception.
 878      */
 879     gen_set_pc_im(s, s->base.pc_next);
 880     gen_helper_v7m_blxns(cpu_env, var);
 881     tcg_temp_free_i32(var);
 882     s->base.is_jmp = DISAS_EXIT;
 883 }
 884
 885 /* Variant of store_reg which uses branch&exchange logic when storing
 886    to r15 in ARM architecture v7 and above. The source must be a temporary
 887    and will be marked as dead. */
 888 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 889 {
 890     if (reg == 15 && ENABLE_ARCH_7) {
 891         gen_bx(s, var);
 892     } else {
 893         store_reg(s, reg, var);
 894     }
 895 }
 896
 897 /* Variant of store_reg which uses branch&exchange logic when storing
 898  * to r15 in ARM architecture v5T and above. This is used for storing
 899  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 900  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 901 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 902 {
 903     if (reg == 15 && ENABLE_ARCH_5) {
 904         gen_bx_excret(s, var);
 905     } else {
 906         store_reg(s, reg, var);
 907     }
 908 }
 909
 910 #ifdef CONFIG_USER_ONLY
 911 #define IS_USER_ONLY 1
 912 #else
 913 #define IS_USER_ONLY 0
 914 #endif
 915
 916 /* Abstractions of "generate code to do a guest load/store for
 917  * AArch32", where a vaddr is always 32 bits (and is zero
 918  * extended if we're a 64 bit core) and  data is also
 919  * 32 bits unless specifically doing a 64 bit access.
 920  * These functions work like tcg_gen_qemu_{ld,st}* except
 921  * that the address argument is TCGv_i32 rather than TCGv.
 922  */
 923
 924 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 925 {
 926     TCGv addr = tcg_temp_new();
 927     tcg_gen_extu_i32_tl(addr, a32);
 928
 929     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 930     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 931         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 932     }
 933     return addr;
 934 }
 935
 936 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 937                             int index, MemOp opc)
 938 {
 939     TCGv addr;
 940
 941     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 942         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 943         opc |= MO_ALIGN;
 944     }
 945
 946     addr = gen_aa32_addr(s, a32, opc);
 947     tcg_gen_qemu_ld_i32(val, addr, index, opc);
 948     tcg_temp_free(addr);
 949 }
 950
 951 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 952                             int index, MemOp opc)
 953 {
 954     TCGv addr;
 955
 956     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 957         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 958         opc |= MO_ALIGN;
 959     }
 960
 961     addr = gen_aa32_addr(s, a32, opc);
 962     tcg_gen_qemu_st_i32(val, addr, index, opc);
 963     tcg_temp_free(addr);
 964 }
 965
 966 #define DO_GEN_LD(SUFF, OPC)                                             \
 967 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
 968                                      TCGv_i32 a32, int index)            \
 969 {                                                                        \
 970     gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
 971 }
 972
 973 #define DO_GEN_ST(SUFF, OPC)                                             \
 974 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
 975                                      TCGv_i32 a32, int index)            \
 976 {                                                                        \
 977     gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
 978 }
 979
 980 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
 981 {
 982     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 983     if (!IS_USER_ONLY && s->sctlr_b) {
 984         tcg_gen_rotri_i64(val, val, 32);
 985     }
 986 }
 987
 988 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 989                             int index, MemOp opc)
 990 {
 991     TCGv addr = gen_aa32_addr(s, a32, opc);
 992     tcg_gen_qemu_ld_i64(val, addr, index, opc);
 993     gen_aa32_frob64(s, val);
 994     tcg_temp_free(addr);
 995 }
 996
 997 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
 998                                  TCGv_i32 a32, int index)
 999 {
1000     gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1001 }
1002
1003 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1004                             int index, MemOp opc)
1005 {
1006     TCGv addr = gen_aa32_addr(s, a32, opc);
1007
1008     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1009     if (!IS_USER_ONLY && s->sctlr_b) {
1010         TCGv_i64 tmp = tcg_temp_new_i64();
1011         tcg_gen_rotri_i64(tmp, val, 32);
1012         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1013         tcg_temp_free_i64(tmp);
1014     } else {
1015         tcg_gen_qemu_st_i64(val, addr, index, opc);
1016     }
1017     tcg_temp_free(addr);
1018 }
1019
1020 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1021                                  TCGv_i32 a32, int index)
1022 {
1023     gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1024 }
1025
1026 DO_GEN_LD(8u, MO_UB)
1027 DO_GEN_LD(16u, MO_UW)
1028 DO_GEN_LD(32u, MO_UL)
1029 DO_GEN_ST(8, MO_UB)
1030 DO_GEN_ST(16, MO_UW)
1031 DO_GEN_ST(32, MO_UL)
1032
1033 static inline void gen_hvc(DisasContext *s, int imm16)
1034 {
1035     /* The pre HVC helper handles cases when HVC gets trapped
1036      * as an undefined insn by runtime configuration (ie before
1037      * the insn really executes).
1038      */
1039     gen_set_pc_im(s, s->pc_curr);
1040     gen_helper_pre_hvc(cpu_env);
1041     /* Otherwise we will treat this as a real exception which
1042      * happens after execution of the insn. (The distinction matters
1043      * for the PC value reported to the exception handler and also
1044      * for single stepping.)
1045      */
1046     s->svc_imm = imm16;
1047     gen_set_pc_im(s, s->base.pc_next);
1048     s->base.is_jmp = DISAS_HVC;
1049 }
1050
1051 static inline void gen_smc(DisasContext *s)
1052 {
1053     /* As with HVC, we may take an exception either before or after
1054      * the insn executes.
1055      */
1056     TCGv_i32 tmp;
1057
1058     gen_set_pc_im(s, s->pc_curr);
1059     tmp = tcg_const_i32(syn_aa32_smc());
1060     gen_helper_pre_smc(cpu_env, tmp);
1061     tcg_temp_free_i32(tmp);
1062     gen_set_pc_im(s, s->base.pc_next);
1063     s->base.is_jmp = DISAS_SMC;
1064 }
1065
1066 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1067 {
1068     gen_set_condexec(s);
1069     gen_set_pc_im(s, pc);
1070     gen_exception_internal(excp);
1071     s->base.is_jmp = DISAS_NORETURN;
1072 }
1073
1074 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1075                                int syn, uint32_t target_el)
1076 {
1077     gen_set_condexec(s);
1078     gen_set_pc_im(s, pc);
1079     gen_exception(excp, syn, target_el);
1080     s->base.is_jmp = DISAS_NORETURN;
1081 }
1082
1083 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1084 {
1085     TCGv_i32 tcg_syn;
1086
1087     gen_set_condexec(s);
1088     gen_set_pc_im(s, s->pc_curr);
1089     tcg_syn = tcg_const_i32(syn);
1090     gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1091     tcg_temp_free_i32(tcg_syn);
1092     s->base.is_jmp = DISAS_NORETURN;
1093 }
1094
1095 static void unallocated_encoding(DisasContext *s)
1096 {
1097     /* Unallocated and reserved encodings are uncategorized */
1098     gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1099                        default_exception_el(s));
1100 }
1101
1102 /* Force a TB lookup after an instruction that changes the CPU state.  */
1103 static inline void gen_lookup_tb(DisasContext *s)
1104 {
1105     tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1106     s->base.is_jmp = DISAS_EXIT;
1107 }
1108
1109 static inline void gen_hlt(DisasContext *s, int imm)
1110 {
1111     /* HLT. This has two purposes.
1112      * Architecturally, it is an external halting debug instruction.
1113      * Since QEMU doesn't implement external debug, we treat this as
1114      * it is required for halting debug disabled: it will UNDEF.
1115      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1116      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1117      * must trigger semihosting even for ARMv7 and earlier, where
1118      * HLT was an undefined encoding.
1119      * In system mode, we don't allow userspace access to
1120      * semihosting, to provide some semblance of security
1121      * (and for consistency with our 32-bit semihosting).
1122      */
1123     if (semihosting_enabled() &&
1124 #ifndef CONFIG_USER_ONLY
1125         s->current_el != 0 &&
1126 #endif
1127         (imm == (s->thumb ? 0x3c : 0xf000))) {
1128         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1129         return;
1130     }
1131
1132     unallocated_encoding(s);
1133 }
1134
1135 static TCGv_ptr get_fpstatus_ptr(int neon)
1136 {
1137     TCGv_ptr statusptr = tcg_temp_new_ptr();
1138     int offset;
1139     if (neon) {
1140         offset = offsetof(CPUARMState, vfp.standard_fp_status);
1141     } else {
1142         offset = offsetof(CPUARMState, vfp.fp_status);
1143     }
1144     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1145     return statusptr;
1146 }
1147
1148 static inline long vfp_reg_offset(bool dp, unsigned reg)
1149 {
1150     if (dp) {
1151         return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1152     } else {
1153         long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1154         if (reg & 1) {
1155             ofs += offsetof(CPU_DoubleU, l.upper);
1156         } else {
1157             ofs += offsetof(CPU_DoubleU, l.lower);
1158         }
1159         return ofs;
1160     }
1161 }
1162
1163 /* Return the offset of a 32-bit piece of a NEON register.
1164    zero is the least significant end of the register.  */
1165 static inline long
1166 neon_reg_offset (int reg, int n)
1167 {
1168     int sreg;
1169     sreg = reg * 2 + n;
1170     return vfp_reg_offset(0, sreg);
1171 }
1172
1173 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1174  * where 0 is the least significant end of the register.
1175  */
1176 static inline long
1177 neon_element_offset(int reg, int element, MemOp size)
1178 {
1179     int element_size = 1 << size;
1180     int ofs = element * element_size;
1181 #ifdef HOST_WORDS_BIGENDIAN
1182     /* Calculate the offset assuming fully little-endian,
1183      * then XOR to account for the order of the 8-byte units.
1184      */
1185     if (element_size < 8) {
1186         ofs ^= 8 - element_size;
1187     }
1188 #endif
1189     return neon_reg_offset(reg, 0) + ofs;
1190 }
1191
1192 static TCGv_i32 neon_load_reg(int reg, int pass)
1193 {
1194     TCGv_i32 tmp = tcg_temp_new_i32();
1195     tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1196     return tmp;
1197 }
1198
1199 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1200 {
1201     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1202
1203     switch (mop) {
1204     case MO_UB:
1205         tcg_gen_ld8u_i32(var, cpu_env, offset);
1206         break;
1207     case MO_UW:
1208         tcg_gen_ld16u_i32(var, cpu_env, offset);
1209         break;
1210     case MO_UL:
1211         tcg_gen_ld_i32(var, cpu_env, offset);
1212         break;
1213     default:
1214         g_assert_not_reached();
1215     }
1216 }
1217
1218 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1219 {
1220     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1221
1222     switch (mop) {
1223     case MO_UB:
1224         tcg_gen_ld8u_i64(var, cpu_env, offset);
1225         break;
1226     case MO_UW:
1227         tcg_gen_ld16u_i64(var, cpu_env, offset);
1228         break;
1229     case MO_UL:
1230         tcg_gen_ld32u_i64(var, cpu_env, offset);
1231         break;
1232     case MO_Q:
1233         tcg_gen_ld_i64(var, cpu_env, offset);
1234         break;
1235     default:
1236         g_assert_not_reached();
1237     }
1238 }
1239
1240 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1241 {
1242     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1243     tcg_temp_free_i32(var);
1244 }
1245
1246 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1247 {
1248     long offset = neon_element_offset(reg, ele, size);
1249
1250     switch (size) {
1251     case MO_8:
1252         tcg_gen_st8_i32(var, cpu_env, offset);
1253         break;
1254     case MO_16:
1255         tcg_gen_st16_i32(var, cpu_env, offset);
1256         break;
1257     case MO_32:
1258         tcg_gen_st_i32(var, cpu_env, offset);
1259         break;
1260     default:
1261         g_assert_not_reached();
1262     }
1263 }
1264
1265 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1266 {
1267     long offset = neon_element_offset(reg, ele, size);
1268
1269     switch (size) {
1270     case MO_8:
1271         tcg_gen_st8_i64(var, cpu_env, offset);
1272         break;
1273     case MO_16:
1274         tcg_gen_st16_i64(var, cpu_env, offset);
1275         break;
1276     case MO_32:
1277         tcg_gen_st32_i64(var, cpu_env, offset);
1278         break;
1279     case MO_64:
1280         tcg_gen_st_i64(var, cpu_env, offset);
1281         break;
1282     default:
1283         g_assert_not_reached();
1284     }
1285 }
1286
1287 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1288 {
1289     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1290 }
1291
1292 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1293 {
1294     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1295 }
1296
1297 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1298 {
1299     tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1300 }
1301
1302 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1303 {
1304     tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1305 }
1306
1307 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1308 {
1309     TCGv_ptr ret = tcg_temp_new_ptr();
1310     tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1311     return ret;
1312 }
1313
1314 #define ARM_CP_RW_BIT   (1 << 20)
1315
1316 /* Include the VFP and Neon decoders */
1317 #include "translate-vfp.inc.c"
1318 #include "translate-neon.inc.c"
1319
1320 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1321 {
1322     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1323 }
1324
1325 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1326 {
1327     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1328 }
1329
1330 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1331 {
1332     TCGv_i32 var = tcg_temp_new_i32();
1333     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1334     return var;
1335 }
1336
1337 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1338 {
1339     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1340     tcg_temp_free_i32(var);
1341 }
1342
1343 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1344 {
1345     iwmmxt_store_reg(cpu_M0, rn);
1346 }
1347
1348 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1349 {
1350     iwmmxt_load_reg(cpu_M0, rn);
1351 }
1352
1353 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1354 {
1355     iwmmxt_load_reg(cpu_V1, rn);
1356     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1357 }
1358
1359 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1360 {
1361     iwmmxt_load_reg(cpu_V1, rn);
1362     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1363 }
1364
1365 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1366 {
1367     iwmmxt_load_reg(cpu_V1, rn);
1368     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1369 }
1370
1371 #define IWMMXT_OP(name) \
1372 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1373 { \
1374     iwmmxt_load_reg(cpu_V1, rn); \
1375     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1376 }
1377
1378 #define IWMMXT_OP_ENV(name) \
1379 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1380 { \
1381     iwmmxt_load_reg(cpu_V1, rn); \
1382     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1383 }
1384
1385 #define IWMMXT_OP_ENV_SIZE(name) \
1386 IWMMXT_OP_ENV(name##b) \
1387 IWMMXT_OP_ENV(name##w) \
1388 IWMMXT_OP_ENV(name##l)
1389
1390 #define IWMMXT_OP_ENV1(name) \
1391 static inline void gen_op_iwmmxt_##name##_M0(void) \
1392 { \
1393     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1394 }
1395
1396 IWMMXT_OP(maddsq)
1397 IWMMXT_OP(madduq)
1398 IWMMXT_OP(sadb)
1399 IWMMXT_OP(sadw)
1400 IWMMXT_OP(mulslw)
1401 IWMMXT_OP(mulshw)
1402 IWMMXT_OP(mululw)
1403 IWMMXT_OP(muluhw)
1404 IWMMXT_OP(macsw)
1405 IWMMXT_OP(macuw)
1406
1407 IWMMXT_OP_ENV_SIZE(unpackl)
1408 IWMMXT_OP_ENV_SIZE(unpackh)
1409
1410 IWMMXT_OP_ENV1(unpacklub)
1411 IWMMXT_OP_ENV1(unpackluw)
1412 IWMMXT_OP_ENV1(unpacklul)
1413 IWMMXT_OP_ENV1(unpackhub)
1414 IWMMXT_OP_ENV1(unpackhuw)
1415 IWMMXT_OP_ENV1(unpackhul)
1416 IWMMXT_OP_ENV1(unpacklsb)
1417 IWMMXT_OP_ENV1(unpacklsw)
1418 IWMMXT_OP_ENV1(unpacklsl)
1419 IWMMXT_OP_ENV1(unpackhsb)
1420 IWMMXT_OP_ENV1(unpackhsw)
1421 IWMMXT_OP_ENV1(unpackhsl)
1422
1423 IWMMXT_OP_ENV_SIZE(cmpeq)
1424 IWMMXT_OP_ENV_SIZE(cmpgtu)
1425 IWMMXT_OP_ENV_SIZE(cmpgts)
1426
1427 IWMMXT_OP_ENV_SIZE(mins)
1428 IWMMXT_OP_ENV_SIZE(minu)
1429 IWMMXT_OP_ENV_SIZE(maxs)
1430 IWMMXT_OP_ENV_SIZE(maxu)
1431
1432 IWMMXT_OP_ENV_SIZE(subn)
1433 IWMMXT_OP_ENV_SIZE(addn)
1434 IWMMXT_OP_ENV_SIZE(subu)
1435 IWMMXT_OP_ENV_SIZE(addu)
1436 IWMMXT_OP_ENV_SIZE(subs)
1437 IWMMXT_OP_ENV_SIZE(adds)
1438
1439 IWMMXT_OP_ENV(avgb0)
1440 IWMMXT_OP_ENV(avgb1)
1441 IWMMXT_OP_ENV(avgw0)
1442 IWMMXT_OP_ENV(avgw1)
1443
1444 IWMMXT_OP_ENV(packuw)
1445 IWMMXT_OP_ENV(packul)
1446 IWMMXT_OP_ENV(packuq)
1447 IWMMXT_OP_ENV(packsw)
1448 IWMMXT_OP_ENV(packsl)
1449 IWMMXT_OP_ENV(packsq)
1450
1451 static void gen_op_iwmmxt_set_mup(void)
1452 {
1453     TCGv_i32 tmp;
1454     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1455     tcg_gen_ori_i32(tmp, tmp, 2);
1456     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1457 }
1458
1459 static void gen_op_iwmmxt_set_cup(void)
1460 {
1461     TCGv_i32 tmp;
1462     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1463     tcg_gen_ori_i32(tmp, tmp, 1);
1464     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1465 }
1466
1467 static void gen_op_iwmmxt_setpsr_nz(void)
1468 {
1469     TCGv_i32 tmp = tcg_temp_new_i32();
1470     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1471     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1472 }
1473
1474 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1475 {
1476     iwmmxt_load_reg(cpu_V1, rn);
1477     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1478     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1479 }
1480
1481 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1482                                      TCGv_i32 dest)
1483 {
1484     int rd;
1485     uint32_t offset;
1486     TCGv_i32 tmp;
1487
1488     rd = (insn >> 16) & 0xf;
1489     tmp = load_reg(s, rd);
1490
1491     offset = (insn & 0xff) << ((insn >> 7) & 2);
1492     if (insn & (1 << 24)) {
1493         /* Pre indexed */
1494         if (insn & (1 << 23))
1495             tcg_gen_addi_i32(tmp, tmp, offset);
1496         else
1497             tcg_gen_addi_i32(tmp, tmp, -offset);
1498         tcg_gen_mov_i32(dest, tmp);
1499         if (insn & (1 << 21))
1500             store_reg(s, rd, tmp);
1501         else
1502             tcg_temp_free_i32(tmp);
1503     } else if (insn & (1 << 21)) {
1504         /* Post indexed */
1505         tcg_gen_mov_i32(dest, tmp);
1506         if (insn & (1 << 23))
1507             tcg_gen_addi_i32(tmp, tmp, offset);
1508         else
1509             tcg_gen_addi_i32(tmp, tmp, -offset);
1510         store_reg(s, rd, tmp);
1511     } else if (!(insn & (1 << 23)))
1512         return 1;
1513     return 0;
1514 }
1515
1516 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1517 {
1518     int rd = (insn >> 0) & 0xf;
1519     TCGv_i32 tmp;
1520
1521     if (insn & (1 << 8)) {
1522         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1523             return 1;
1524         } else {
1525             tmp = iwmmxt_load_creg(rd);
1526         }
1527     } else {
1528         tmp = tcg_temp_new_i32();
1529         iwmmxt_load_reg(cpu_V0, rd);
1530         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1531     }
1532     tcg_gen_andi_i32(tmp, tmp, mask);
1533     tcg_gen_mov_i32(dest, tmp);
1534     tcg_temp_free_i32(tmp);
1535     return 0;
1536 }
1537
1538 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1539    (ie. an undefined instruction).  */
1540 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1541 {
1542     int rd, wrd;
1543     int rdhi, rdlo, rd0, rd1, i;
1544     TCGv_i32 addr;
1545     TCGv_i32 tmp, tmp2, tmp3;
1546
1547     if ((insn & 0x0e000e00) == 0x0c000000) {
1548         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1549             wrd = insn & 0xf;
1550             rdlo = (insn >> 12) & 0xf;
1551             rdhi = (insn >> 16) & 0xf;
1552             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1553                 iwmmxt_load_reg(cpu_V0, wrd);
1554                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1555                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1556             } else {                                    /* TMCRR */
1557                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1558                 iwmmxt_store_reg(cpu_V0, wrd);
1559                 gen_op_iwmmxt_set_mup();
1560             }
1561             return 0;
1562         }
1563
1564         wrd = (insn >> 12) & 0xf;
1565         addr = tcg_temp_new_i32();
1566         if (gen_iwmmxt_address(s, insn, addr)) {
1567             tcg_temp_free_i32(addr);
1568             return 1;
1569         }
1570         if (insn & ARM_CP_RW_BIT) {
1571             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1572                 tmp = tcg_temp_new_i32();
1573                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1574                 iwmmxt_store_creg(wrd, tmp);
1575             } else {
1576                 i = 1;
1577                 if (insn & (1 << 8)) {
1578                     if (insn & (1 << 22)) {             /* WLDRD */
1579                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1580                         i = 0;
1581                     } else {                            /* WLDRW wRd */
1582                         tmp = tcg_temp_new_i32();
1583                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1584                     }
1585                 } else {
1586                     tmp = tcg_temp_new_i32();
1587                     if (insn & (1 << 22)) {             /* WLDRH */
1588                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1589                     } else {                            /* WLDRB */
1590                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1591                     }
1592                 }
1593                 if (i) {
1594                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1595                     tcg_temp_free_i32(tmp);
1596                 }
1597                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1598             }
1599         } else {
1600             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1601                 tmp = iwmmxt_load_creg(wrd);
1602                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1603             } else {
1604                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1605                 tmp = tcg_temp_new_i32();
1606                 if (insn & (1 << 8)) {
1607                     if (insn & (1 << 22)) {             /* WSTRD */
1608                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1609                     } else {                            /* WSTRW wRd */
1610                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1611                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1612                     }
1613                 } else {
1614                     if (insn & (1 << 22)) {             /* WSTRH */
1615                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1616                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1617                     } else {                            /* WSTRB */
1618                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1619                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1620                     }
1621                 }
1622             }
1623             tcg_temp_free_i32(tmp);
1624         }
1625         tcg_temp_free_i32(addr);
1626         return 0;
1627     }
1628
1629     if ((insn & 0x0f000000) != 0x0e000000)
1630         return 1;
1631
1632     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1633     case 0x000:                                                 /* WOR */
1634         wrd = (insn >> 12) & 0xf;
1635         rd0 = (insn >> 0) & 0xf;
1636         rd1 = (insn >> 16) & 0xf;
1637         gen_op_iwmmxt_movq_M0_wRn(rd0);
1638         gen_op_iwmmxt_orq_M0_wRn(rd1);
1639         gen_op_iwmmxt_setpsr_nz();
1640         gen_op_iwmmxt_movq_wRn_M0(wrd);
1641         gen_op_iwmmxt_set_mup();
1642         gen_op_iwmmxt_set_cup();
1643         break;
1644     case 0x011:                                                 /* TMCR */
1645         if (insn & 0xf)
1646             return 1;
1647         rd = (insn >> 12) & 0xf;
1648         wrd = (insn >> 16) & 0xf;
1649         switch (wrd) {
1650         case ARM_IWMMXT_wCID:
1651         case ARM_IWMMXT_wCASF:
1652             break;
1653         case ARM_IWMMXT_wCon:
1654             gen_op_iwmmxt_set_cup();
1655             /* Fall through.  */
1656         case ARM_IWMMXT_wCSSF:
1657             tmp = iwmmxt_load_creg(wrd);
1658             tmp2 = load_reg(s, rd);
1659             tcg_gen_andc_i32(tmp, tmp, tmp2);
1660             tcg_temp_free_i32(tmp2);
1661             iwmmxt_store_creg(wrd, tmp);
1662             break;
1663         case ARM_IWMMXT_wCGR0:
1664         case ARM_IWMMXT_wCGR1:
1665         case ARM_IWMMXT_wCGR2:
1666         case ARM_IWMMXT_wCGR3:
1667             gen_op_iwmmxt_set_cup();
1668             tmp = load_reg(s, rd);
1669             iwmmxt_store_creg(wrd, tmp);
1670             break;
1671         default:
1672             return 1;
1673         }
1674         break;
1675     case 0x100:                                                 /* WXOR */
1676         wrd = (insn >> 12) & 0xf;
1677         rd0 = (insn >> 0) & 0xf;
1678         rd1 = (insn >> 16) & 0xf;
1679         gen_op_iwmmxt_movq_M0_wRn(rd0);
1680         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1681         gen_op_iwmmxt_setpsr_nz();
1682         gen_op_iwmmxt_movq_wRn_M0(wrd);
1683         gen_op_iwmmxt_set_mup();
1684         gen_op_iwmmxt_set_cup();
1685         break;
1686     case 0x111:                                                 /* TMRC */
1687         if (insn & 0xf)
1688             return 1;
1689         rd = (insn >> 12) & 0xf;
1690         wrd = (insn >> 16) & 0xf;
1691         tmp = iwmmxt_load_creg(wrd);
1692         store_reg(s, rd, tmp);
1693         break;
1694     case 0x300:                                                 /* WANDN */
1695         wrd = (insn >> 12) & 0xf;
1696         rd0 = (insn >> 0) & 0xf;
1697         rd1 = (insn >> 16) & 0xf;
1698         gen_op_iwmmxt_movq_M0_wRn(rd0);
1699         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1700         gen_op_iwmmxt_andq_M0_wRn(rd1);
1701         gen_op_iwmmxt_setpsr_nz();
1702         gen_op_iwmmxt_movq_wRn_M0(wrd);
1703         gen_op_iwmmxt_set_mup();
1704         gen_op_iwmmxt_set_cup();
1705         break;
1706     case 0x200:                                                 /* WAND */
1707         wrd = (insn >> 12) & 0xf;
1708         rd0 = (insn >> 0) & 0xf;
1709         rd1 = (insn >> 16) & 0xf;
1710         gen_op_iwmmxt_movq_M0_wRn(rd0);
1711         gen_op_iwmmxt_andq_M0_wRn(rd1);
1712         gen_op_iwmmxt_setpsr_nz();
1713         gen_op_iwmmxt_movq_wRn_M0(wrd);
1714         gen_op_iwmmxt_set_mup();
1715         gen_op_iwmmxt_set_cup();
1716         break;
1717     case 0x810: case 0xa10:                             /* WMADD */
1718         wrd = (insn >> 12) & 0xf;
1719         rd0 = (insn >> 0) & 0xf;
1720         rd1 = (insn >> 16) & 0xf;
1721         gen_op_iwmmxt_movq_M0_wRn(rd0);
1722         if (insn & (1 << 21))
1723             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1724         else
1725             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1726         gen_op_iwmmxt_movq_wRn_M0(wrd);
1727         gen_op_iwmmxt_set_mup();
1728         break;
1729     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1730         wrd = (insn >> 12) & 0xf;
1731         rd0 = (insn >> 16) & 0xf;
1732         rd1 = (insn >> 0) & 0xf;
1733         gen_op_iwmmxt_movq_M0_wRn(rd0);
1734         switch ((insn >> 22) & 3) {
1735         case 0:
1736             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1737             break;
1738         case 1:
1739             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1740             break;
1741         case 2:
1742             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1743             break;
1744         case 3:
1745             return 1;
1746         }
1747         gen_op_iwmmxt_movq_wRn_M0(wrd);
1748         gen_op_iwmmxt_set_mup();
1749         gen_op_iwmmxt_set_cup();
1750         break;
1751     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1752         wrd = (insn >> 12) & 0xf;
1753         rd0 = (insn >> 16) & 0xf;
1754         rd1 = (insn >> 0) & 0xf;
1755         gen_op_iwmmxt_movq_M0_wRn(rd0);
1756         switch ((insn >> 22) & 3) {
1757         case 0:
1758             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1759             break;
1760         case 1:
1761             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1762             break;
1763         case 2:
1764             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1765             break;
1766         case 3:
1767             return 1;
1768         }
1769         gen_op_iwmmxt_movq_wRn_M0(wrd);
1770         gen_op_iwmmxt_set_mup();
1771         gen_op_iwmmxt_set_cup();
1772         break;
1773     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1774         wrd = (insn >> 12) & 0xf;
1775         rd0 = (insn >> 16) & 0xf;
1776         rd1 = (insn >> 0) & 0xf;
1777         gen_op_iwmmxt_movq_M0_wRn(rd0);
1778         if (insn & (1 << 22))
1779             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1780         else
1781             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1782         if (!(insn & (1 << 20)))
1783             gen_op_iwmmxt_addl_M0_wRn(wrd);
1784         gen_op_iwmmxt_movq_wRn_M0(wrd);
1785         gen_op_iwmmxt_set_mup();
1786         break;
1787     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1788         wrd = (insn >> 12) & 0xf;
1789         rd0 = (insn >> 16) & 0xf;
1790         rd1 = (insn >> 0) & 0xf;
1791         gen_op_iwmmxt_movq_M0_wRn(rd0);
1792         if (insn & (1 << 21)) {
1793             if (insn & (1 << 20))
1794                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1795             else
1796                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1797         } else {
1798             if (insn & (1 << 20))
1799                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1800             else
1801                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1802         }
1803         gen_op_iwmmxt_movq_wRn_M0(wrd);
1804         gen_op_iwmmxt_set_mup();
1805         break;
1806     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1807         wrd = (insn >> 12) & 0xf;
1808         rd0 = (insn >> 16) & 0xf;
1809         rd1 = (insn >> 0) & 0xf;
1810         gen_op_iwmmxt_movq_M0_wRn(rd0);
1811         if (insn & (1 << 21))
1812             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1813         else
1814             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1815         if (!(insn & (1 << 20))) {
1816             iwmmxt_load_reg(cpu_V1, wrd);
1817             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1818         }
1819         gen_op_iwmmxt_movq_wRn_M0(wrd);
1820         gen_op_iwmmxt_set_mup();
1821         break;
1822     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1823         wrd = (insn >> 12) & 0xf;
1824         rd0 = (insn >> 16) & 0xf;
1825         rd1 = (insn >> 0) & 0xf;
1826         gen_op_iwmmxt_movq_M0_wRn(rd0);
1827         switch ((insn >> 22) & 3) {
1828         case 0:
1829             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1830             break;
1831         case 1:
1832             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1833             break;
1834         case 2:
1835             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1836             break;
1837         case 3:
1838             return 1;
1839         }
1840         gen_op_iwmmxt_movq_wRn_M0(wrd);
1841         gen_op_iwmmxt_set_mup();
1842         gen_op_iwmmxt_set_cup();
1843         break;
1844     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1845         wrd = (insn >> 12) & 0xf;
1846         rd0 = (insn >> 16) & 0xf;
1847         rd1 = (insn >> 0) & 0xf;
1848         gen_op_iwmmxt_movq_M0_wRn(rd0);
1849         if (insn & (1 << 22)) {
1850             if (insn & (1 << 20))
1851                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1852             else
1853                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1854         } else {
1855             if (insn & (1 << 20))
1856                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1857             else
1858                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1859         }
1860         gen_op_iwmmxt_movq_wRn_M0(wrd);
1861         gen_op_iwmmxt_set_mup();
1862         gen_op_iwmmxt_set_cup();
1863         break;
1864     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1865         wrd = (insn >> 12) & 0xf;
1866         rd0 = (insn >> 16) & 0xf;
1867         rd1 = (insn >> 0) & 0xf;
1868         gen_op_iwmmxt_movq_M0_wRn(rd0);
1869         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1870         tcg_gen_andi_i32(tmp, tmp, 7);
1871         iwmmxt_load_reg(cpu_V1, rd1);
1872         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1873         tcg_temp_free_i32(tmp);
1874         gen_op_iwmmxt_movq_wRn_M0(wrd);
1875         gen_op_iwmmxt_set_mup();
1876         break;
1877     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1878         if (((insn >> 6) & 3) == 3)
1879             return 1;
1880         rd = (insn >> 12) & 0xf;
1881         wrd = (insn >> 16) & 0xf;
1882         tmp = load_reg(s, rd);
1883         gen_op_iwmmxt_movq_M0_wRn(wrd);
1884         switch ((insn >> 6) & 3) {
1885         case 0:
1886             tmp2 = tcg_const_i32(0xff);
1887             tmp3 = tcg_const_i32((insn & 7) << 3);
1888             break;
1889         case 1:
1890             tmp2 = tcg_const_i32(0xffff);
1891             tmp3 = tcg_const_i32((insn & 3) << 4);
1892             break;
1893         case 2:
1894             tmp2 = tcg_const_i32(0xffffffff);
1895             tmp3 = tcg_const_i32((insn & 1) << 5);
1896             break;
1897         default:
1898             tmp2 = NULL;
1899             tmp3 = NULL;
1900         }
1901         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1902         tcg_temp_free_i32(tmp3);
1903         tcg_temp_free_i32(tmp2);
1904         tcg_temp_free_i32(tmp);
1905         gen_op_iwmmxt_movq_wRn_M0(wrd);
1906         gen_op_iwmmxt_set_mup();
1907         break;
1908     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1909         rd = (insn >> 12) & 0xf;
1910         wrd = (insn >> 16) & 0xf;
1911         if (rd == 15 || ((insn >> 22) & 3) == 3)
1912             return 1;
1913         gen_op_iwmmxt_movq_M0_wRn(wrd);
1914         tmp = tcg_temp_new_i32();
1915         switch ((insn >> 22) & 3) {
1916         case 0:
1917             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1918             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1919             if (insn & 8) {
1920                 tcg_gen_ext8s_i32(tmp, tmp);
1921             } else {
1922                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1923             }
1924             break;
1925         case 1:
1926             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1927             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1928             if (insn & 8) {
1929                 tcg_gen_ext16s_i32(tmp, tmp);
1930             } else {
1931                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1932             }
1933             break;
1934         case 2:
1935             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1936             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1937             break;
1938         }
1939         store_reg(s, rd, tmp);
1940         break;
1941     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1942         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1943             return 1;
1944         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1945         switch ((insn >> 22) & 3) {
1946         case 0:
1947             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1948             break;
1949         case 1:
1950             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1951             break;
1952         case 2:
1953             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1954             break;
1955         }
1956         tcg_gen_shli_i32(tmp, tmp, 28);
1957         gen_set_nzcv(tmp);
1958         tcg_temp_free_i32(tmp);
1959         break;
1960     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1961         if (((insn >> 6) & 3) == 3)
1962             return 1;
1963         rd = (insn >> 12) & 0xf;
1964         wrd = (insn >> 16) & 0xf;
1965         tmp = load_reg(s, rd);
1966         switch ((insn >> 6) & 3) {
1967         case 0:
1968             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1969             break;
1970         case 1:
1971             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1972             break;
1973         case 2:
1974             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1975             break;
1976         }
1977         tcg_temp_free_i32(tmp);
1978         gen_op_iwmmxt_movq_wRn_M0(wrd);
1979         gen_op_iwmmxt_set_mup();
1980         break;
1981     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1982         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1983             return 1;
1984         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1985         tmp2 = tcg_temp_new_i32();
1986         tcg_gen_mov_i32(tmp2, tmp);
1987         switch ((insn >> 22) & 3) {
1988         case 0:
1989             for (i = 0; i < 7; i ++) {
1990                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1991                 tcg_gen_and_i32(tmp, tmp, tmp2);
1992             }
1993             break;
1994         case 1:
1995             for (i = 0; i < 3; i ++) {
1996                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1997                 tcg_gen_and_i32(tmp, tmp, tmp2);
1998             }
1999             break;
2000         case 2:
2001             tcg_gen_shli_i32(tmp2, tmp2, 16);
2002             tcg_gen_and_i32(tmp, tmp, tmp2);
2003             break;
2004         }
2005         gen_set_nzcv(tmp);
2006         tcg_temp_free_i32(tmp2);
2007         tcg_temp_free_i32(tmp);
2008         break;
2009     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
2010         wrd = (insn >> 12) & 0xf;
2011         rd0 = (insn >> 16) & 0xf;
2012         gen_op_iwmmxt_movq_M0_wRn(rd0);
2013         switch ((insn >> 22) & 3) {
2014         case 0:
2015             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2016             break;
2017         case 1:
2018             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2019             break;
2020         case 2:
2021             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2022             break;
2023         case 3:
2024             return 1;
2025         }
2026         gen_op_iwmmxt_movq_wRn_M0(wrd);
2027         gen_op_iwmmxt_set_mup();
2028         break;
2029     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2030         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2031             return 1;
2032         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2033         tmp2 = tcg_temp_new_i32();
2034         tcg_gen_mov_i32(tmp2, tmp);
2035         switch ((insn >> 22) & 3) {
2036         case 0:
2037             for (i = 0; i < 7; i ++) {
2038                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2039                 tcg_gen_or_i32(tmp, tmp, tmp2);
2040             }
2041             break;
2042         case 1:
2043             for (i = 0; i < 3; i ++) {
2044                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2045                 tcg_gen_or_i32(tmp, tmp, tmp2);
2046             }
2047             break;
2048         case 2:
2049             tcg_gen_shli_i32(tmp2, tmp2, 16);
2050             tcg_gen_or_i32(tmp, tmp, tmp2);
2051             break;
2052         }
2053         gen_set_nzcv(tmp);
2054         tcg_temp_free_i32(tmp2);
2055         tcg_temp_free_i32(tmp);
2056         break;
2057     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2058         rd = (insn >> 12) & 0xf;
2059         rd0 = (insn >> 16) & 0xf;
2060         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2061             return 1;
2062         gen_op_iwmmxt_movq_M0_wRn(rd0);
2063         tmp = tcg_temp_new_i32();
2064         switch ((insn >> 22) & 3) {
2065         case 0:
2066             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2067             break;
2068         case 1:
2069             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2070             break;
2071         case 2:
2072             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2073             break;
2074         }
2075         store_reg(s, rd, tmp);
2076         break;
2077     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2078     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2079         wrd = (insn >> 12) & 0xf;
2080         rd0 = (insn >> 16) & 0xf;
2081         rd1 = (insn >> 0) & 0xf;
2082         gen_op_iwmmxt_movq_M0_wRn(rd0);
2083         switch ((insn >> 22) & 3) {
2084         case 0:
2085             if (insn & (1 << 21))
2086                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2087             else
2088                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2089             break;
2090         case 1:
2091             if (insn & (1 << 21))
2092                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2093             else
2094                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2095             break;
2096         case 2:
2097             if (insn & (1 << 21))
2098                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2099             else
2100                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2101             break;
2102         case 3:
2103             return 1;
2104         }
2105         gen_op_iwmmxt_movq_wRn_M0(wrd);
2106         gen_op_iwmmxt_set_mup();
2107         gen_op_iwmmxt_set_cup();
2108         break;
2109     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2110     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2111         wrd = (insn >> 12) & 0xf;
2112         rd0 = (insn >> 16) & 0xf;
2113         gen_op_iwmmxt_movq_M0_wRn(rd0);
2114         switch ((insn >> 22) & 3) {
2115         case 0:
2116             if (insn & (1 << 21))
2117                 gen_op_iwmmxt_unpacklsb_M0();
2118             else
2119                 gen_op_iwmmxt_unpacklub_M0();
2120             break;
2121         case 1:
2122             if (insn & (1 << 21))
2123                 gen_op_iwmmxt_unpacklsw_M0();
2124             else
2125                 gen_op_iwmmxt_unpackluw_M0();
2126             break;
2127         case 2:
2128             if (insn & (1 << 21))
2129                 gen_op_iwmmxt_unpacklsl_M0();
2130             else
2131                 gen_op_iwmmxt_unpacklul_M0();
2132             break;
2133         case 3:
2134             return 1;
2135         }
2136         gen_op_iwmmxt_movq_wRn_M0(wrd);
2137         gen_op_iwmmxt_set_mup();
2138         gen_op_iwmmxt_set_cup();
2139         break;
2140     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2141     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2142         wrd = (insn >> 12) & 0xf;
2143         rd0 = (insn >> 16) & 0xf;
2144         gen_op_iwmmxt_movq_M0_wRn(rd0);
2145         switch ((insn >> 22) & 3) {
2146         case 0:
2147             if (insn & (1 << 21))
2148                 gen_op_iwmmxt_unpackhsb_M0();
2149             else
2150                 gen_op_iwmmxt_unpackhub_M0();
2151             break;
2152         case 1:
2153             if (insn & (1 << 21))
2154                 gen_op_iwmmxt_unpackhsw_M0();
2155             else
2156                 gen_op_iwmmxt_unpackhuw_M0();
2157             break;
2158         case 2:
2159             if (insn & (1 << 21))
2160                 gen_op_iwmmxt_unpackhsl_M0();
2161             else
2162                 gen_op_iwmmxt_unpackhul_M0();
2163             break;
2164         case 3:
2165             return 1;
2166         }
2167         gen_op_iwmmxt_movq_wRn_M0(wrd);
2168         gen_op_iwmmxt_set_mup();
2169         gen_op_iwmmxt_set_cup();
2170         break;
2171     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2172     case 0x214: case 0x614: case 0xa14: case 0xe14:
2173         if (((insn >> 22) & 3) == 0)
2174             return 1;
2175         wrd = (insn >> 12) & 0xf;
2176         rd0 = (insn >> 16) & 0xf;
2177         gen_op_iwmmxt_movq_M0_wRn(rd0);
2178         tmp = tcg_temp_new_i32();
2179         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2180             tcg_temp_free_i32(tmp);
2181             return 1;
2182         }
2183         switch ((insn >> 22) & 3) {
2184         case 1:
2185             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2186             break;
2187         case 2:
2188             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2189             break;
2190         case 3:
2191             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2192             break;
2193         }
2194         tcg_temp_free_i32(tmp);
2195         gen_op_iwmmxt_movq_wRn_M0(wrd);
2196         gen_op_iwmmxt_set_mup();
2197         gen_op_iwmmxt_set_cup();
2198         break;
2199     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2200     case 0x014: case 0x414: case 0x814: case 0xc14:
2201         if (((insn >> 22) & 3) == 0)
2202             return 1;
2203         wrd = (insn >> 12) & 0xf;
2204         rd0 = (insn >> 16) & 0xf;
2205         gen_op_iwmmxt_movq_M0_wRn(rd0);
2206         tmp = tcg_temp_new_i32();
2207         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2208             tcg_temp_free_i32(tmp);
2209             return 1;
2210         }
2211         switch ((insn >> 22) & 3) {
2212         case 1:
2213             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2214             break;
2215         case 2:
2216             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2217             break;
2218         case 3:
2219             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2220             break;
2221         }
2222         tcg_temp_free_i32(tmp);
2223         gen_op_iwmmxt_movq_wRn_M0(wrd);
2224         gen_op_iwmmxt_set_mup();
2225         gen_op_iwmmxt_set_cup();
2226         break;
2227     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2228     case 0x114: case 0x514: case 0x914: case 0xd14:
2229         if (((insn >> 22) & 3) == 0)
2230             return 1;
2231         wrd = (insn >> 12) & 0xf;
2232         rd0 = (insn >> 16) & 0xf;
2233         gen_op_iwmmxt_movq_M0_wRn(rd0);
2234         tmp = tcg_temp_new_i32();
2235         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2236             tcg_temp_free_i32(tmp);
2237             return 1;
2238         }
2239         switch ((insn >> 22) & 3) {
2240         case 1:
2241             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2242             break;
2243         case 2:
2244             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2245             break;
2246         case 3:
2247             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2248             break;
2249         }
2250         tcg_temp_free_i32(tmp);
2251         gen_op_iwmmxt_movq_wRn_M0(wrd);
2252         gen_op_iwmmxt_set_mup();
2253         gen_op_iwmmxt_set_cup();
2254         break;
2255     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2256     case 0x314: case 0x714: case 0xb14: case 0xf14:
2257         if (((insn >> 22) & 3) == 0)
2258             return 1;
2259         wrd = (insn >> 12) & 0xf;
2260         rd0 = (insn >> 16) & 0xf;
2261         gen_op_iwmmxt_movq_M0_wRn(rd0);
2262         tmp = tcg_temp_new_i32();
2263         switch ((insn >> 22) & 3) {
2264         case 1:
2265             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2266                 tcg_temp_free_i32(tmp);
2267                 return 1;
2268             }
2269             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2270             break;
2271         case 2:
2272             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2273                 tcg_temp_free_i32(tmp);
2274                 return 1;
2275             }
2276             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2277             break;
2278         case 3:
2279             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2280                 tcg_temp_free_i32(tmp);
2281                 return 1;
2282             }
2283             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2284             break;
2285         }
2286         tcg_temp_free_i32(tmp);
2287         gen_op_iwmmxt_movq_wRn_M0(wrd);
2288         gen_op_iwmmxt_set_mup();
2289         gen_op_iwmmxt_set_cup();
2290         break;
2291     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2292     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2293         wrd = (insn >> 12) & 0xf;
2294         rd0 = (insn >> 16) & 0xf;
2295         rd1 = (insn >> 0) & 0xf;
2296         gen_op_iwmmxt_movq_M0_wRn(rd0);
2297         switch ((insn >> 22) & 3) {
2298         case 0:
2299             if (insn & (1 << 21))
2300                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2301             else
2302                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2303             break;
2304         case 1:
2305             if (insn & (1 << 21))
2306                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2307             else
2308                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2309             break;
2310         case 2:
2311             if (insn & (1 << 21))
2312                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2313             else
2314                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2315             break;
2316         case 3:
2317             return 1;
2318         }
2319         gen_op_iwmmxt_movq_wRn_M0(wrd);
2320         gen_op_iwmmxt_set_mup();
2321         break;
2322     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2323     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2324         wrd = (insn >> 12) & 0xf;
2325         rd0 = (insn >> 16) & 0xf;
2326         rd1 = (insn >> 0) & 0xf;
2327         gen_op_iwmmxt_movq_M0_wRn(rd0);
2328         switch ((insn >> 22) & 3) {
2329         case 0:
2330             if (insn & (1 << 21))
2331                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2332             else
2333                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2334             break;
2335         case 1:
2336             if (insn & (1 << 21))
2337                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2338             else
2339                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2340             break;
2341         case 2:
2342             if (insn & (1 << 21))
2343                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2344             else
2345                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2346             break;
2347         case 3:
2348             return 1;
2349         }
2350         gen_op_iwmmxt_movq_wRn_M0(wrd);
2351         gen_op_iwmmxt_set_mup();
2352         break;
2353     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2354     case 0x402: case 0x502: case 0x602: case 0x702:
2355         wrd = (insn >> 12) & 0xf;
2356         rd0 = (insn >> 16) & 0xf;
2357         rd1 = (insn >> 0) & 0xf;
2358         gen_op_iwmmxt_movq_M0_wRn(rd0);
2359         tmp = tcg_const_i32((insn >> 20) & 3);
2360         iwmmxt_load_reg(cpu_V1, rd1);
2361         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2362         tcg_temp_free_i32(tmp);
2363         gen_op_iwmmxt_movq_wRn_M0(wrd);
2364         gen_op_iwmmxt_set_mup();
2365         break;
2366     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2367     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2368     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2369     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2370         wrd = (insn >> 12) & 0xf;
2371         rd0 = (insn >> 16) & 0xf;
2372         rd1 = (insn >> 0) & 0xf;
2373         gen_op_iwmmxt_movq_M0_wRn(rd0);
2374         switch ((insn >> 20) & 0xf) {
2375         case 0x0:
2376             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2377             break;
2378         case 0x1:
2379             gen_op_iwmmxt_subub_M0_wRn(rd1);
2380             break;
2381         case 0x3:
2382             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2383             break;
2384         case 0x4:
2385             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2386             break;
2387         case 0x5:
2388             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2389             break;
2390         case 0x7:
2391             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2392             break;
2393         case 0x8:
2394             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2395             break;
2396         case 0x9:
2397             gen_op_iwmmxt_subul_M0_wRn(rd1);
2398             break;
2399         case 0xb:
2400             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2401             break;
2402         default:
2403             return 1;
2404         }
2405         gen_op_iwmmxt_movq_wRn_M0(wrd);
2406         gen_op_iwmmxt_set_mup();
2407         gen_op_iwmmxt_set_cup();
2408         break;
2409     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2410     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2411     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2412     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2413         wrd = (insn >> 12) & 0xf;
2414         rd0 = (insn >> 16) & 0xf;
2415         gen_op_iwmmxt_movq_M0_wRn(rd0);
2416         tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2417         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2418         tcg_temp_free_i32(tmp);
2419         gen_op_iwmmxt_movq_wRn_M0(wrd);
2420         gen_op_iwmmxt_set_mup();
2421         gen_op_iwmmxt_set_cup();
2422         break;
2423     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2424     case 0x418: case 0x518: case 0x618: case 0x718:
2425     case 0x818: case 0x918: case 0xa18: case 0xb18:
2426     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2427         wrd = (insn >> 12) & 0xf;
2428         rd0 = (insn >> 16) & 0xf;
2429         rd1 = (insn >> 0) & 0xf;
2430         gen_op_iwmmxt_movq_M0_wRn(rd0);
2431         switch ((insn >> 20) & 0xf) {
2432         case 0x0:
2433             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2434             break;
2435         case 0x1:
2436             gen_op_iwmmxt_addub_M0_wRn(rd1);
2437             break;
2438         case 0x3:
2439             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2440             break;
2441         case 0x4:
2442             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2443             break;
2444         case 0x5:
2445             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2446             break;
2447         case 0x7:
2448             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2449             break;
2450         case 0x8:
2451             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2452             break;
2453         case 0x9:
2454             gen_op_iwmmxt_addul_M0_wRn(rd1);
2455             break;
2456         case 0xb:
2457             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2458             break;
2459         default:
2460             return 1;
2461         }
2462         gen_op_iwmmxt_movq_wRn_M0(wrd);
2463         gen_op_iwmmxt_set_mup();
2464         gen_op_iwmmxt_set_cup();
2465         break;
2466     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2467     case 0x408: case 0x508: case 0x608: case 0x708:
2468     case 0x808: case 0x908: case 0xa08: case 0xb08:
2469     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2470         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2471             return 1;
2472         wrd = (insn >> 12) & 0xf;
2473         rd0 = (insn >> 16) & 0xf;
2474         rd1 = (insn >> 0) & 0xf;
2475         gen_op_iwmmxt_movq_M0_wRn(rd0);
2476         switch ((insn >> 22) & 3) {
2477         case 1:
2478             if (insn & (1 << 21))
2479                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2480             else
2481                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2482             break;
2483         case 2:
2484             if (insn & (1 << 21))
2485                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2486             else
2487                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2488             break;
2489         case 3:
2490             if (insn & (1 << 21))
2491                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2492             else
2493                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2494             break;
2495         }
2496         gen_op_iwmmxt_movq_wRn_M0(wrd);
2497         gen_op_iwmmxt_set_mup();
2498         gen_op_iwmmxt_set_cup();
2499         break;
2500     case 0x201: case 0x203: case 0x205: case 0x207:
2501     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2502     case 0x211: case 0x213: case 0x215: case 0x217:
2503     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2504         wrd = (insn >> 5) & 0xf;
2505         rd0 = (insn >> 12) & 0xf;
2506         rd1 = (insn >> 0) & 0xf;
2507         if (rd0 == 0xf || rd1 == 0xf)
2508             return 1;
2509         gen_op_iwmmxt_movq_M0_wRn(wrd);
2510         tmp = load_reg(s, rd0);
2511         tmp2 = load_reg(s, rd1);
2512         switch ((insn >> 16) & 0xf) {
2513         case 0x0:                                       /* TMIA */
2514             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2515             break;
2516         case 0x8:                                       /* TMIAPH */
2517             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2518             break;
2519         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2520             if (insn & (1 << 16))
2521                 tcg_gen_shri_i32(tmp, tmp, 16);
2522             if (insn & (1 << 17))
2523                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2524             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2525             break;
2526         default:
2527             tcg_temp_free_i32(tmp2);
2528             tcg_temp_free_i32(tmp);
2529             return 1;
2530         }
2531         tcg_temp_free_i32(tmp2);
2532         tcg_temp_free_i32(tmp);
2533         gen_op_iwmmxt_movq_wRn_M0(wrd);
2534         gen_op_iwmmxt_set_mup();
2535         break;
2536     default:
2537         return 1;
2538     }
2539
2540     return 0;
2541 }
2542
2543 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2544    (ie. an undefined instruction).  */
2545 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2546 {
2547     int acc, rd0, rd1, rdhi, rdlo;
2548     TCGv_i32 tmp, tmp2;
2549
2550     if ((insn & 0x0ff00f10) == 0x0e200010) {
2551         /* Multiply with Internal Accumulate Format */
2552         rd0 = (insn >> 12) & 0xf;
2553         rd1 = insn & 0xf;
2554         acc = (insn >> 5) & 7;
2555
2556         if (acc != 0)
2557             return 1;
2558
2559         tmp = load_reg(s, rd0);
2560         tmp2 = load_reg(s, rd1);
2561         switch ((insn >> 16) & 0xf) {
2562         case 0x0:                                       /* MIA */
2563             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2564             break;
2565         case 0x8:                                       /* MIAPH */
2566             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2567             break;
2568         case 0xc:                                       /* MIABB */
2569         case 0xd:                                       /* MIABT */
2570         case 0xe:                                       /* MIATB */
2571         case 0xf:                                       /* MIATT */
2572             if (insn & (1 << 16))
2573                 tcg_gen_shri_i32(tmp, tmp, 16);
2574             if (insn & (1 << 17))
2575                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2576             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2577             break;
2578         default:
2579             return 1;
2580         }
2581         tcg_temp_free_i32(tmp2);
2582         tcg_temp_free_i32(tmp);
2583
2584         gen_op_iwmmxt_movq_wRn_M0(acc);
2585         return 0;
2586     }
2587
2588     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2589         /* Internal Accumulator Access Format */
2590         rdhi = (insn >> 16) & 0xf;
2591         rdlo = (insn >> 12) & 0xf;
2592         acc = insn & 7;
2593
2594         if (acc != 0)
2595             return 1;
2596
2597         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2598             iwmmxt_load_reg(cpu_V0, acc);
2599             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2600             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2601             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2602         } else {                                        /* MAR */
2603             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2604             iwmmxt_store_reg(cpu_V0, acc);
2605         }
2606         return 0;
2607     }
2608
2609     return 1;
2610 }
2611
2612 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2613 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2614     if (dc_isar_feature(aa32_simd_r32, s)) { \
2615         reg = (((insn) >> (bigbit)) & 0x0f) \
2616               | (((insn) >> ((smallbit) - 4)) & 0x10); \
2617     } else { \
2618         if (insn & (1 << (smallbit))) \
2619             return 1; \
2620         reg = ((insn) >> (bigbit)) & 0x0f; \
2621     }} while (0)
2622
2623 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2624 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2625 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2626
2627 static void gen_neon_dup_low16(TCGv_i32 var)
2628 {
2629     TCGv_i32 tmp = tcg_temp_new_i32();
2630     tcg_gen_ext16u_i32(var, var);
2631     tcg_gen_shli_i32(tmp, var, 16);
2632     tcg_gen_or_i32(var, var, tmp);
2633     tcg_temp_free_i32(tmp);
2634 }
2635
2636 static void gen_neon_dup_high16(TCGv_i32 var)
2637 {
2638     TCGv_i32 tmp = tcg_temp_new_i32();
2639     tcg_gen_andi_i32(var, var, 0xffff0000);
2640     tcg_gen_shri_i32(tmp, var, 16);
2641     tcg_gen_or_i32(var, var, tmp);
2642     tcg_temp_free_i32(tmp);
2643 }
2644
2645 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2646 {
2647 #ifndef CONFIG_USER_ONLY
2648     return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2649            ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2650 #else
2651     return true;
2652 #endif
2653 }
2654
2655 static void gen_goto_ptr(void)
2656 {
2657     tcg_gen_lookup_and_goto_ptr();
2658 }
2659
2660 /* This will end the TB but doesn't guarantee we'll return to
2661  * cpu_loop_exec. Any live exit_requests will be processed as we
2662  * enter the next TB.
2663  */
2664 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2665 {
2666     if (use_goto_tb(s, dest)) {
2667         tcg_gen_goto_tb(n);
2668         gen_set_pc_im(s, dest);
2669         tcg_gen_exit_tb(s->base.tb, n);
2670     } else {
2671         gen_set_pc_im(s, dest);
2672         gen_goto_ptr();
2673     }
2674     s->base.is_jmp = DISAS_NORETURN;
2675 }
2676
2677 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2678 {
2679     if (unlikely(is_singlestepping(s))) {
2680         /* An indirect jump so that we still trigger the debug exception.  */
2681         gen_set_pc_im(s, dest);
2682         s->base.is_jmp = DISAS_JUMP;
2683     } else {
2684         gen_goto_tb(s, 0, dest);
2685     }
2686 }
2687
2688 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2689 {
2690     if (x)
2691         tcg_gen_sari_i32(t0, t0, 16);
2692     else
2693         gen_sxth(t0);
2694     if (y)
2695         tcg_gen_sari_i32(t1, t1, 16);
2696     else
2697         gen_sxth(t1);
2698     tcg_gen_mul_i32(t0, t0, t1);
2699 }
2700
2701 /* Return the mask of PSR bits set by a MSR instruction.  */
2702 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2703 {
2704     uint32_t mask = 0;
2705
2706     if (flags & (1 << 0)) {
2707         mask |= 0xff;
2708     }
2709     if (flags & (1 << 1)) {
2710         mask |= 0xff00;
2711     }
2712     if (flags & (1 << 2)) {
2713         mask |= 0xff0000;
2714     }
2715     if (flags & (1 << 3)) {
2716         mask |= 0xff000000;
2717     }
2718
2719     /* Mask out undefined and reserved bits.  */
2720     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2721
2722     /* Mask out execution state.  */
2723     if (!spsr) {
2724         mask &= ~CPSR_EXEC;
2725     }
2726
2727     /* Mask out privileged bits.  */
2728     if (IS_USER(s)) {
2729         mask &= CPSR_USER;
2730     }
2731     return mask;
2732 }
2733
2734 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2735 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2736 {
2737     TCGv_i32 tmp;
2738     if (spsr) {
2739         /* ??? This is also undefined in system mode.  */
2740         if (IS_USER(s))
2741             return 1;
2742
2743         tmp = load_cpu_field(spsr);
2744         tcg_gen_andi_i32(tmp, tmp, ~mask);
2745         tcg_gen_andi_i32(t0, t0, mask);
2746         tcg_gen_or_i32(tmp, tmp, t0);
2747         store_cpu_field(tmp, spsr);
2748     } else {
2749         gen_set_cpsr(t0, mask);
2750     }
2751     tcg_temp_free_i32(t0);
2752     gen_lookup_tb(s);
2753     return 0;
2754 }
2755
2756 /* Returns nonzero if access to the PSR is not permitted.  */
2757 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2758 {
2759     TCGv_i32 tmp;
2760     tmp = tcg_temp_new_i32();
2761     tcg_gen_movi_i32(tmp, val);
2762     return gen_set_psr(s, mask, spsr, tmp);
2763 }
2764
2765 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2766                                      int *tgtmode, int *regno)
2767 {
2768     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2769      * the target mode and register number, and identify the various
2770      * unpredictable cases.
2771      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2772      *  + executed in user mode
2773      *  + using R15 as the src/dest register
2774      *  + accessing an unimplemented register
2775      *  + accessing a register that's inaccessible at current PL/security state*
2776      *  + accessing a register that you could access with a different insn
2777      * We choose to UNDEF in all these cases.
2778      * Since we don't know which of the various AArch32 modes we are in
2779      * we have to defer some checks to runtime.
2780      * Accesses to Monitor mode registers from Secure EL1 (which implies
2781      * that EL3 is AArch64) must trap to EL3.
2782      *
2783      * If the access checks fail this function will emit code to take
2784      * an exception and return false. Otherwise it will return true,
2785      * and set *tgtmode and *regno appropriately.
2786      */
2787     int exc_target = default_exception_el(s);
2788
2789     /* These instructions are present only in ARMv8, or in ARMv7 with the
2790      * Virtualization Extensions.
2791      */
2792     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2793         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2794         goto undef;
2795     }
2796
2797     if (IS_USER(s) || rn == 15) {
2798         goto undef;
2799     }
2800
2801     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2802      * of registers into (r, sysm).
2803      */
2804     if (r) {
2805         /* SPSRs for other modes */
2806         switch (sysm) {
2807         case 0xe: /* SPSR_fiq */
2808             *tgtmode = ARM_CPU_MODE_FIQ;
2809             break;
2810         case 0x10: /* SPSR_irq */
2811             *tgtmode = ARM_CPU_MODE_IRQ;
2812             break;
2813         case 0x12: /* SPSR_svc */
2814             *tgtmode = ARM_CPU_MODE_SVC;
2815             break;
2816         case 0x14: /* SPSR_abt */
2817             *tgtmode = ARM_CPU_MODE_ABT;
2818             break;
2819         case 0x16: /* SPSR_und */
2820             *tgtmode = ARM_CPU_MODE_UND;
2821             break;
2822         case 0x1c: /* SPSR_mon */
2823             *tgtmode = ARM_CPU_MODE_MON;
2824             break;
2825         case 0x1e: /* SPSR_hyp */
2826             *tgtmode = ARM_CPU_MODE_HYP;
2827             break;
2828         default: /* unallocated */
2829             goto undef;
2830         }
2831         /* We arbitrarily assign SPSR a register number of 16. */
2832         *regno = 16;
2833     } else {
2834         /* general purpose registers for other modes */
2835         switch (sysm) {
2836         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2837             *tgtmode = ARM_CPU_MODE_USR;
2838             *regno = sysm + 8;
2839             break;
2840         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2841             *tgtmode = ARM_CPU_MODE_FIQ;
2842             *regno = sysm;
2843             break;
2844         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2845             *tgtmode = ARM_CPU_MODE_IRQ;
2846             *regno = sysm & 1 ? 13 : 14;
2847             break;
2848         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2849             *tgtmode = ARM_CPU_MODE_SVC;
2850             *regno = sysm & 1 ? 13 : 14;
2851             break;
2852         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2853             *tgtmode = ARM_CPU_MODE_ABT;
2854             *regno = sysm & 1 ? 13 : 14;
2855             break;
2856         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2857             *tgtmode = ARM_CPU_MODE_UND;
2858             *regno = sysm & 1 ? 13 : 14;
2859             break;
2860         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2861             *tgtmode = ARM_CPU_MODE_MON;
2862             *regno = sysm & 1 ? 13 : 14;
2863             break;
2864         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2865             *tgtmode = ARM_CPU_MODE_HYP;
2866             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2867             *regno = sysm & 1 ? 13 : 17;
2868             break;
2869         default: /* unallocated */
2870             goto undef;
2871         }
2872     }
2873
2874     /* Catch the 'accessing inaccessible register' cases we can detect
2875      * at translate time.
2876      */
2877     switch (*tgtmode) {
2878     case ARM_CPU_MODE_MON:
2879         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2880             goto undef;
2881         }
2882         if (s->current_el == 1) {
2883             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2884              * then accesses to Mon registers trap to EL3
2885              */
2886             exc_target = 3;
2887             goto undef;
2888         }
2889         break;
2890     case ARM_CPU_MODE_HYP:
2891         /*
2892          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2893          * (and so we can forbid accesses from EL2 or below). elr_hyp
2894          * can be accessed also from Hyp mode, so forbid accesses from
2895          * EL0 or EL1.
2896          */
2897         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2898             (s->current_el < 3 && *regno != 17)) {
2899             goto undef;
2900         }
2901         break;
2902     default:
2903         break;
2904     }
2905
2906     return true;
2907
2908 undef:
2909     /* If we get here then some access check did not pass */
2910     gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2911                        syn_uncategorized(), exc_target);
2912     return false;
2913 }
2914
2915 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2916 {
2917     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2918     int tgtmode = 0, regno = 0;
2919
2920     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2921         return;
2922     }
2923
2924     /* Sync state because msr_banked() can raise exceptions */
2925     gen_set_condexec(s);
2926     gen_set_pc_im(s, s->pc_curr);
2927     tcg_reg = load_reg(s, rn);
2928     tcg_tgtmode = tcg_const_i32(tgtmode);
2929     tcg_regno = tcg_const_i32(regno);
2930     gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2931     tcg_temp_free_i32(tcg_tgtmode);
2932     tcg_temp_free_i32(tcg_regno);
2933     tcg_temp_free_i32(tcg_reg);
2934     s->base.is_jmp = DISAS_UPDATE;
2935 }
2936
2937 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2938 {
2939     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2940     int tgtmode = 0, regno = 0;
2941
2942     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2943         return;
2944     }
2945
2946     /* Sync state because mrs_banked() can raise exceptions */
2947     gen_set_condexec(s);
2948     gen_set_pc_im(s, s->pc_curr);
2949     tcg_reg = tcg_temp_new_i32();
2950     tcg_tgtmode = tcg_const_i32(tgtmode);
2951     tcg_regno = tcg_const_i32(regno);
2952     gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2953     tcg_temp_free_i32(tcg_tgtmode);
2954     tcg_temp_free_i32(tcg_regno);
2955     store_reg(s, rn, tcg_reg);
2956     s->base.is_jmp = DISAS_UPDATE;
2957 }
2958
2959 /* Store value to PC as for an exception return (ie don't
2960  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2961  * will do the masking based on the new value of the Thumb bit.
2962  */
2963 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2964 {
2965     tcg_gen_mov_i32(cpu_R[15], pc);
2966     tcg_temp_free_i32(pc);
2967 }
2968
2969 /* Generate a v6 exception return.  Marks both values as dead.  */
2970 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2971 {
2972     store_pc_exc_ret(s, pc);
2973     /* The cpsr_write_eret helper will mask the low bits of PC
2974      * appropriately depending on the new Thumb bit, so it must
2975      * be called after storing the new PC.
2976      */
2977     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2978         gen_io_start();
2979     }
2980     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2981     tcg_temp_free_i32(cpsr);
2982     /* Must exit loop to check un-masked IRQs */
2983     s->base.is_jmp = DISAS_EXIT;
2984 }
2985
2986 /* Generate an old-style exception return. Marks pc as dead. */
2987 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2988 {
2989     gen_rfe(s, pc, load_cpu_field(spsr));
2990 }
2991
2992 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
2993
2994 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
2995 {
2996     switch (size) {
2997     case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
2998     case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
2999     case 2: tcg_gen_add_i32(t0, t0, t1); break;
3000     default: abort();
3001     }
3002 }
3003
3004 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3005 {
3006     switch (size) {
3007     case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3008     case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3009     case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3010     default: return;
3011     }
3012 }
3013
3014 #define GEN_NEON_INTEGER_OP_ENV(name) do { \
3015     switch ((size << 1) | u) { \
3016     case 0: \
3017         gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3018         break; \
3019     case 1: \
3020         gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3021         break; \
3022     case 2: \
3023         gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3024         break; \
3025     case 3: \
3026         gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3027         break; \
3028     case 4: \
3029         gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3030         break; \
3031     case 5: \
3032         gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3033         break; \
3034     default: return 1; \
3035     }} while (0)
3036
3037 static TCGv_i32 neon_load_scratch(int scratch)
3038 {
3039     TCGv_i32 tmp = tcg_temp_new_i32();
3040     tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3041     return tmp;
3042 }
3043
3044 static void neon_store_scratch(int scratch, TCGv_i32 var)
3045 {
3046     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3047     tcg_temp_free_i32(var);
3048 }
3049
3050 static inline TCGv_i32 neon_get_scalar(int size, int reg)
3051 {
3052     TCGv_i32 tmp;
3053     if (size == 1) {
3054         tmp = neon_load_reg(reg & 7, reg >> 4);
3055         if (reg & 8) {
3056             gen_neon_dup_high16(tmp);
3057         } else {
3058             gen_neon_dup_low16(tmp);
3059         }
3060     } else {
3061         tmp = neon_load_reg(reg & 15, reg >> 4);
3062     }
3063     return tmp;
3064 }
3065
3066 static int gen_neon_unzip(int rd, int rm, int size, int q)
3067 {
3068     TCGv_ptr pd, pm;
3069
3070     if (!q && size == 2) {
3071         return 1;
3072     }
3073     pd = vfp_reg_ptr(true, rd);
3074     pm = vfp_reg_ptr(true, rm);
3075     if (q) {
3076         switch (size) {
3077         case 0:
3078             gen_helper_neon_qunzip8(pd, pm);
3079             break;
3080         case 1:
3081             gen_helper_neon_qunzip16(pd, pm);
3082             break;
3083         case 2:
3084             gen_helper_neon_qunzip32(pd, pm);
3085             break;
3086         default:
3087             abort();
3088         }
3089     } else {
3090         switch (size) {
3091         case 0:
3092             gen_helper_neon_unzip8(pd, pm);
3093             break;
3094         case 1:
3095             gen_helper_neon_unzip16(pd, pm);
3096             break;
3097         default:
3098             abort();
3099         }
3100     }
3101     tcg_temp_free_ptr(pd);
3102     tcg_temp_free_ptr(pm);
3103     return 0;
3104 }
3105
3106 static int gen_neon_zip(int rd, int rm, int size, int q)
3107 {
3108     TCGv_ptr pd, pm;
3109
3110     if (!q && size == 2) {
3111         return 1;
3112     }
3113     pd = vfp_reg_ptr(true, rd);
3114     pm = vfp_reg_ptr(true, rm);
3115     if (q) {
3116         switch (size) {
3117         case 0:
3118             gen_helper_neon_qzip8(pd, pm);
3119             break;
3120         case 1:
3121             gen_helper_neon_qzip16(pd, pm);
3122             break;
3123         case 2:
3124             gen_helper_neon_qzip32(pd, pm);
3125             break;
3126         default:
3127             abort();
3128         }
3129     } else {
3130         switch (size) {
3131         case 0:
3132             gen_helper_neon_zip8(pd, pm);
3133             break;
3134         case 1:
3135             gen_helper_neon_zip16(pd, pm);
3136             break;
3137         default:
3138             abort();
3139         }
3140     }
3141     tcg_temp_free_ptr(pd);
3142     tcg_temp_free_ptr(pm);
3143     return 0;
3144 }
3145
3146 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3147 {
3148     TCGv_i32 rd, tmp;
3149
3150     rd = tcg_temp_new_i32();
3151     tmp = tcg_temp_new_i32();
3152
3153     tcg_gen_shli_i32(rd, t0, 8);
3154     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3155     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3156     tcg_gen_or_i32(rd, rd, tmp);
3157
3158     tcg_gen_shri_i32(t1, t1, 8);
3159     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3160     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3161     tcg_gen_or_i32(t1, t1, tmp);
3162     tcg_gen_mov_i32(t0, rd);
3163
3164     tcg_temp_free_i32(tmp);
3165     tcg_temp_free_i32(rd);
3166 }
3167
3168 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3169 {
3170     TCGv_i32 rd, tmp;
3171
3172     rd = tcg_temp_new_i32();
3173     tmp = tcg_temp_new_i32();
3174
3175     tcg_gen_shli_i32(rd, t0, 16);
3176     tcg_gen_andi_i32(tmp, t1, 0xffff);
3177     tcg_gen_or_i32(rd, rd, tmp);
3178     tcg_gen_shri_i32(t1, t1, 16);
3179     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3180     tcg_gen_or_i32(t1, t1, tmp);
3181     tcg_gen_mov_i32(t0, rd);
3182
3183     tcg_temp_free_i32(tmp);
3184     tcg_temp_free_i32(rd);
3185 }
3186
3187 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3188 {
3189     switch (size) {
3190     case 0: gen_helper_neon_narrow_u8(dest, src); break;
3191     case 1: gen_helper_neon_narrow_u16(dest, src); break;
3192     case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3193     default: abort();
3194     }
3195 }
3196
3197 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3198 {
3199     switch (size) {
3200     case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3201     case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3202     case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3203     default: abort();
3204     }
3205 }
3206
3207 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3208 {
3209     switch (size) {
3210     case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3211     case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3212     case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3213     default: abort();
3214     }
3215 }
3216
3217 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3218 {
3219     switch (size) {
3220     case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3221     case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3222     case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3223     default: abort();
3224     }
3225 }
3226
3227 static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
3228                                          int q, int u)
3229 {
3230     if (q) {
3231         if (u) {
3232             switch (size) {
3233             case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3234             case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3235             default: abort();
3236             }
3237         } else {
3238             switch (size) {
3239             case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3240             case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3241             default: abort();
3242             }
3243         }
3244     } else {
3245         if (u) {
3246             switch (size) {
3247             case 1: gen_helper_neon_shl_u16(var, var, shift); break;
3248             case 2: gen_ushl_i32(var, var, shift); break;
3249             default: abort();
3250             }
3251         } else {
3252             switch (size) {
3253             case 1: gen_helper_neon_shl_s16(var, var, shift); break;
3254             case 2: gen_sshl_i32(var, var, shift); break;
3255             default: abort();
3256             }
3257         }
3258     }
3259 }
3260
3261 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3262 {
3263     if (u) {
3264         switch (size) {
3265         case 0: gen_helper_neon_widen_u8(dest, src); break;
3266         case 1: gen_helper_neon_widen_u16(dest, src); break;
3267         case 2: tcg_gen_extu_i32_i64(dest, src); break;
3268         default: abort();
3269         }
3270     } else {
3271         switch (size) {
3272         case 0: gen_helper_neon_widen_s8(dest, src); break;
3273         case 1: gen_helper_neon_widen_s16(dest, src); break;
3274         case 2: tcg_gen_ext_i32_i64(dest, src); break;
3275         default: abort();
3276         }
3277     }
3278     tcg_temp_free_i32(src);
3279 }
3280
3281 static inline void gen_neon_addl(int size)
3282 {
3283     switch (size) {
3284     case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3285     case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3286     case 2: tcg_gen_add_i64(CPU_V001); break;
3287     default: abort();
3288     }
3289 }
3290
3291 static inline void gen_neon_subl(int size)
3292 {
3293     switch (size) {
3294     case 0: gen_helper_neon_subl_u16(CPU_V001); break;
3295     case 1: gen_helper_neon_subl_u32(CPU_V001); break;
3296     case 2: tcg_gen_sub_i64(CPU_V001); break;
3297     default: abort();
3298     }
3299 }
3300
3301 static inline void gen_neon_negl(TCGv_i64 var, int size)
3302 {
3303     switch (size) {
3304     case 0: gen_helper_neon_negl_u16(var, var); break;
3305     case 1: gen_helper_neon_negl_u32(var, var); break;
3306     case 2:
3307         tcg_gen_neg_i64(var, var);
3308         break;
3309     default: abort();
3310     }
3311 }
3312
3313 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3314 {
3315     switch (size) {
3316     case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3317     case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3318     default: abort();
3319     }
3320 }
3321
3322 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3323                                  int size, int u)
3324 {
3325     TCGv_i64 tmp;
3326
3327     switch ((size << 1) | u) {
3328     case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3329     case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3330     case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3331     case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3332     case 4:
3333         tmp = gen_muls_i64_i32(a, b);
3334         tcg_gen_mov_i64(dest, tmp);
3335         tcg_temp_free_i64(tmp);
3336         break;
3337     case 5:
3338         tmp = gen_mulu_i64_i32(a, b);
3339         tcg_gen_mov_i64(dest, tmp);
3340         tcg_temp_free_i64(tmp);
3341         break;
3342     default: abort();
3343     }
3344
3345     /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
3346        Don't forget to clean them now.  */
3347     if (size < 2) {
3348         tcg_temp_free_i32(a);
3349         tcg_temp_free_i32(b);
3350     }
3351 }
3352
3353 static void gen_neon_narrow_op(int op, int u, int size,
3354                                TCGv_i32 dest, TCGv_i64 src)
3355 {
3356     if (op) {
3357         if (u) {
3358             gen_neon_unarrow_sats(size, dest, src);
3359         } else {
3360             gen_neon_narrow(size, dest, src);
3361         }
3362     } else {
3363         if (u) {
3364             gen_neon_narrow_satu(size, dest, src);
3365         } else {
3366             gen_neon_narrow_sats(size, dest, src);
3367         }
3368     }
3369 }
3370
3371 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
3372  * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3373  * table A7-13.
3374  */
3375 #define NEON_2RM_VREV64 0
3376 #define NEON_2RM_VREV32 1
3377 #define NEON_2RM_VREV16 2
3378 #define NEON_2RM_VPADDL 4
3379 #define NEON_2RM_VPADDL_U 5
3380 #define NEON_2RM_AESE 6 /* Includes AESD */
3381 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
3382 #define NEON_2RM_VCLS 8
3383 #define NEON_2RM_VCLZ 9
3384 #define NEON_2RM_VCNT 10
3385 #define NEON_2RM_VMVN 11
3386 #define NEON_2RM_VPADAL 12
3387 #define NEON_2RM_VPADAL_U 13
3388 #define NEON_2RM_VQABS 14
3389 #define NEON_2RM_VQNEG 15
3390 #define NEON_2RM_VCGT0 16
3391 #define NEON_2RM_VCGE0 17
3392 #define NEON_2RM_VCEQ0 18
3393 #define NEON_2RM_VCLE0 19
3394 #define NEON_2RM_VCLT0 20
3395 #define NEON_2RM_SHA1H 21
3396 #define NEON_2RM_VABS 22
3397 #define NEON_2RM_VNEG 23
3398 #define NEON_2RM_VCGT0_F 24
3399 #define NEON_2RM_VCGE0_F 25
3400 #define NEON_2RM_VCEQ0_F 26
3401 #define NEON_2RM_VCLE0_F 27
3402 #define NEON_2RM_VCLT0_F 28
3403 #define NEON_2RM_VABS_F 30
3404 #define NEON_2RM_VNEG_F 31
3405 #define NEON_2RM_VSWP 32
3406 #define NEON_2RM_VTRN 33
3407 #define NEON_2RM_VUZP 34
3408 #define NEON_2RM_VZIP 35
3409 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3410 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3411 #define NEON_2RM_VSHLL 38
3412 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3413 #define NEON_2RM_VRINTN 40
3414 #define NEON_2RM_VRINTX 41
3415 #define NEON_2RM_VRINTA 42
3416 #define NEON_2RM_VRINTZ 43
3417 #define NEON_2RM_VCVT_F16_F32 44
3418 #define NEON_2RM_VRINTM 45
3419 #define NEON_2RM_VCVT_F32_F16 46
3420 #define NEON_2RM_VRINTP 47
3421 #define NEON_2RM_VCVTAU 48
3422 #define NEON_2RM_VCVTAS 49
3423 #define NEON_2RM_VCVTNU 50
3424 #define NEON_2RM_VCVTNS 51
3425 #define NEON_2RM_VCVTPU 52
3426 #define NEON_2RM_VCVTPS 53
3427 #define NEON_2RM_VCVTMU 54
3428 #define NEON_2RM_VCVTMS 55
3429 #define NEON_2RM_VRECPE 56
3430 #define NEON_2RM_VRSQRTE 57
3431 #define NEON_2RM_VRECPE_F 58
3432 #define NEON_2RM_VRSQRTE_F 59
3433 #define NEON_2RM_VCVT_FS 60
3434 #define NEON_2RM_VCVT_FU 61
3435 #define NEON_2RM_VCVT_SF 62
3436 #define NEON_2RM_VCVT_UF 63
3437
3438 static bool neon_2rm_is_v8_op(int op)
3439 {
3440     /* Return true if this neon 2reg-misc op is ARMv8 and up */
3441     switch (op) {
3442     case NEON_2RM_VRINTN:
3443     case NEON_2RM_VRINTA:
3444     case NEON_2RM_VRINTM:
3445     case NEON_2RM_VRINTP:
3446     case NEON_2RM_VRINTZ:
3447     case NEON_2RM_VRINTX:
3448     case NEON_2RM_VCVTAU:
3449     case NEON_2RM_VCVTAS:
3450     case NEON_2RM_VCVTNU:
3451     case NEON_2RM_VCVTNS:
3452     case NEON_2RM_VCVTPU:
3453     case NEON_2RM_VCVTPS:
3454     case NEON_2RM_VCVTMU:
3455     case NEON_2RM_VCVTMS:
3456         return true;
3457     default:
3458         return false;
3459     }
3460 }
3461
3462 /* Each entry in this array has bit n set if the insn allows
3463  * size value n (otherwise it will UNDEF). Since unallocated
3464  * op values will have no bits set they always UNDEF.
3465  */
3466 static const uint8_t neon_2rm_sizes[] = {
3467     [NEON_2RM_VREV64] = 0x7,
3468     [NEON_2RM_VREV32] = 0x3,
3469     [NEON_2RM_VREV16] = 0x1,
3470     [NEON_2RM_VPADDL] = 0x7,
3471     [NEON_2RM_VPADDL_U] = 0x7,
3472     [NEON_2RM_AESE] = 0x1,
3473     [NEON_2RM_AESMC] = 0x1,
3474     [NEON_2RM_VCLS] = 0x7,
3475     [NEON_2RM_VCLZ] = 0x7,
3476     [NEON_2RM_VCNT] = 0x1,
3477     [NEON_2RM_VMVN] = 0x1,
3478     [NEON_2RM_VPADAL] = 0x7,
3479     [NEON_2RM_VPADAL_U] = 0x7,
3480     [NEON_2RM_VQABS] = 0x7,
3481     [NEON_2RM_VQNEG] = 0x7,
3482     [NEON_2RM_VCGT0] = 0x7,
3483     [NEON_2RM_VCGE0] = 0x7,
3484     [NEON_2RM_VCEQ0] = 0x7,
3485     [NEON_2RM_VCLE0] = 0x7,
3486     [NEON_2RM_VCLT0] = 0x7,
3487     [NEON_2RM_SHA1H] = 0x4,
3488     [NEON_2RM_VABS] = 0x7,
3489     [NEON_2RM_VNEG] = 0x7,
3490     [NEON_2RM_VCGT0_F] = 0x4,
3491     [NEON_2RM_VCGE0_F] = 0x4,
3492     [NEON_2RM_VCEQ0_F] = 0x4,
3493     [NEON_2RM_VCLE0_F] = 0x4,
3494     [NEON_2RM_VCLT0_F] = 0x4,
3495     [NEON_2RM_VABS_F] = 0x4,
3496     [NEON_2RM_VNEG_F] = 0x4,
3497     [NEON_2RM_VSWP] = 0x1,
3498     [NEON_2RM_VTRN] = 0x7,
3499     [NEON_2RM_VUZP] = 0x7,
3500     [NEON_2RM_VZIP] = 0x7,
3501     [NEON_2RM_VMOVN] = 0x7,
3502     [NEON_2RM_VQMOVN] = 0x7,
3503     [NEON_2RM_VSHLL] = 0x7,
3504     [NEON_2RM_SHA1SU1] = 0x4,
3505     [NEON_2RM_VRINTN] = 0x4,
3506     [NEON_2RM_VRINTX] = 0x4,
3507     [NEON_2RM_VRINTA] = 0x4,
3508     [NEON_2RM_VRINTZ] = 0x4,
3509     [NEON_2RM_VCVT_F16_F32] = 0x2,
3510     [NEON_2RM_VRINTM] = 0x4,
3511     [NEON_2RM_VCVT_F32_F16] = 0x2,
3512     [NEON_2RM_VRINTP] = 0x4,
3513     [NEON_2RM_VCVTAU] = 0x4,
3514     [NEON_2RM_VCVTAS] = 0x4,
3515     [NEON_2RM_VCVTNU] = 0x4,
3516     [NEON_2RM_VCVTNS] = 0x4,
3517     [NEON_2RM_VCVTPU] = 0x4,
3518     [NEON_2RM_VCVTPS] = 0x4,
3519     [NEON_2RM_VCVTMU] = 0x4,
3520     [NEON_2RM_VCVTMS] = 0x4,
3521     [NEON_2RM_VRECPE] = 0x4,
3522     [NEON_2RM_VRSQRTE] = 0x4,
3523     [NEON_2RM_VRECPE_F] = 0x4,
3524     [NEON_2RM_VRSQRTE_F] = 0x4,
3525     [NEON_2RM_VCVT_FS] = 0x4,
3526     [NEON_2RM_VCVT_FU] = 0x4,
3527     [NEON_2RM_VCVT_SF] = 0x4,
3528     [NEON_2RM_VCVT_UF] = 0x4,
3529 };
3530
3531 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
3532                             uint32_t opr_sz, uint32_t max_sz,
3533                             gen_helper_gvec_3_ptr *fn)
3534 {
3535     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
3536
3537     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
3538     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
3539                        opr_sz, max_sz, 0, fn);
3540     tcg_temp_free_ptr(qc_ptr);
3541 }
3542
3543 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3544                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3545 {
3546     static gen_helper_gvec_3_ptr * const fns[2] = {
3547         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
3548     };
3549     tcg_debug_assert(vece >= 1 && vece <= 2);
3550     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3551 }
3552
3553 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3554                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3555 {
3556     static gen_helper_gvec_3_ptr * const fns[2] = {
3557         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
3558     };
3559     tcg_debug_assert(vece >= 1 && vece <= 2);
3560     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3561 }
3562
3563 #define GEN_CMP0(NAME, COND)                                            \
3564     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
3565     {                                                                   \
3566         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
3567         tcg_gen_neg_i32(d, d);                                          \
3568     }                                                                   \
3569     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
3570     {                                                                   \
3571         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
3572         tcg_gen_neg_i64(d, d);                                          \
3573     }                                                                   \
3574     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3575     {                                                                   \
3576         TCGv_vec zero = tcg_const_zeros_vec_matching(d);                \
3577         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
3578         tcg_temp_free_vec(zero);                                        \
3579     }                                                                   \
3580     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
3581                             uint32_t opr_sz, uint32_t max_sz)           \
3582     {                                                                   \
3583         const GVecGen2 op[4] = {                                        \
3584             { .fno = gen_helper_gvec_##NAME##0_b,                       \
3585               .fniv = gen_##NAME##0_vec,                                \
3586               .opt_opc = vecop_list_cmp,                                \
3587               .vece = MO_8 },                                           \
3588             { .fno = gen_helper_gvec_##NAME##0_h,                       \
3589               .fniv = gen_##NAME##0_vec,                                \
3590               .opt_opc = vecop_list_cmp,                                \
3591               .vece = MO_16 },                                          \
3592             { .fni4 = gen_##NAME##0_i32,                                \
3593               .fniv = gen_##NAME##0_vec,                                \
3594               .opt_opc = vecop_list_cmp,                                \
3595               .vece = MO_32 },                                          \
3596             { .fni8 = gen_##NAME##0_i64,                                \
3597               .fniv = gen_##NAME##0_vec,                                \
3598               .opt_opc = vecop_list_cmp,                                \
3599               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
3600               .vece = MO_64 },                                          \
3601         };                                                              \
3602         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
3603     }
3604
3605 static const TCGOpcode vecop_list_cmp[] = {
3606     INDEX_op_cmp_vec, 0
3607 };
3608
3609 GEN_CMP0(ceq, TCG_COND_EQ)
3610 GEN_CMP0(cle, TCG_COND_LE)
3611 GEN_CMP0(cge, TCG_COND_GE)
3612 GEN_CMP0(clt, TCG_COND_LT)
3613 GEN_CMP0(cgt, TCG_COND_GT)
3614
3615 #undef GEN_CMP0
3616
3617 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3618 {
3619     tcg_gen_vec_sar8i_i64(a, a, shift);
3620     tcg_gen_vec_add8_i64(d, d, a);
3621 }
3622
3623 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3624 {
3625     tcg_gen_vec_sar16i_i64(a, a, shift);
3626     tcg_gen_vec_add16_i64(d, d, a);
3627 }
3628
3629 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3630 {
3631     tcg_gen_sari_i32(a, a, shift);
3632     tcg_gen_add_i32(d, d, a);
3633 }
3634
3635 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3636 {
3637     tcg_gen_sari_i64(a, a, shift);
3638     tcg_gen_add_i64(d, d, a);
3639 }
3640
3641 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3642 {
3643     tcg_gen_sari_vec(vece, a, a, sh);
3644     tcg_gen_add_vec(vece, d, d, a);
3645 }
3646
3647 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3648                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3649 {
3650     static const TCGOpcode vecop_list[] = {
3651         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3652     };
3653     static const GVecGen2i ops[4] = {
3654         { .fni8 = gen_ssra8_i64,
3655           .fniv = gen_ssra_vec,
3656           .fno = gen_helper_gvec_ssra_b,
3657           .load_dest = true,
3658           .opt_opc = vecop_list,
3659           .vece = MO_8 },
3660         { .fni8 = gen_ssra16_i64,
3661           .fniv = gen_ssra_vec,
3662           .fno = gen_helper_gvec_ssra_h,
3663           .load_dest = true,
3664           .opt_opc = vecop_list,
3665           .vece = MO_16 },
3666         { .fni4 = gen_ssra32_i32,
3667           .fniv = gen_ssra_vec,
3668           .fno = gen_helper_gvec_ssra_s,
3669           .load_dest = true,
3670           .opt_opc = vecop_list,
3671           .vece = MO_32 },
3672         { .fni8 = gen_ssra64_i64,
3673           .fniv = gen_ssra_vec,
3674           .fno = gen_helper_gvec_ssra_b,
3675           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3676           .opt_opc = vecop_list,
3677           .load_dest = true,
3678           .vece = MO_64 },
3679     };
3680
3681     /* tszimm encoding produces immediates in the range [1..esize]. */
3682     tcg_debug_assert(shift > 0);
3683     tcg_debug_assert(shift <= (8 << vece));
3684
3685     /*
3686      * Shifts larger than the element size are architecturally valid.
3687      * Signed results in all sign bits.
3688      */
3689     shift = MIN(shift, (8 << vece) - 1);
3690     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3691 }
3692
3693 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3694 {
3695     tcg_gen_vec_shr8i_i64(a, a, shift);
3696     tcg_gen_vec_add8_i64(d, d, a);
3697 }
3698
3699 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3700 {
3701     tcg_gen_vec_shr16i_i64(a, a, shift);
3702     tcg_gen_vec_add16_i64(d, d, a);
3703 }
3704
3705 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3706 {
3707     tcg_gen_shri_i32(a, a, shift);
3708     tcg_gen_add_i32(d, d, a);
3709 }
3710
3711 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3712 {
3713     tcg_gen_shri_i64(a, a, shift);
3714     tcg_gen_add_i64(d, d, a);
3715 }
3716
3717 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3718 {
3719     tcg_gen_shri_vec(vece, a, a, sh);
3720     tcg_gen_add_vec(vece, d, d, a);
3721 }
3722
3723 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3724                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3725 {
3726     static const TCGOpcode vecop_list[] = {
3727         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3728     };
3729     static const GVecGen2i ops[4] = {
3730         { .fni8 = gen_usra8_i64,
3731           .fniv = gen_usra_vec,
3732           .fno = gen_helper_gvec_usra_b,
3733           .load_dest = true,
3734           .opt_opc = vecop_list,
3735           .vece = MO_8, },
3736         { .fni8 = gen_usra16_i64,
3737           .fniv = gen_usra_vec,
3738           .fno = gen_helper_gvec_usra_h,
3739           .load_dest = true,
3740           .opt_opc = vecop_list,
3741           .vece = MO_16, },
3742         { .fni4 = gen_usra32_i32,
3743           .fniv = gen_usra_vec,
3744           .fno = gen_helper_gvec_usra_s,
3745           .load_dest = true,
3746           .opt_opc = vecop_list,
3747           .vece = MO_32, },
3748         { .fni8 = gen_usra64_i64,
3749           .fniv = gen_usra_vec,
3750           .fno = gen_helper_gvec_usra_d,
3751           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3752           .load_dest = true,
3753           .opt_opc = vecop_list,
3754           .vece = MO_64, },
3755     };
3756
3757     /* tszimm encoding produces immediates in the range [1..esize]. */
3758     tcg_debug_assert(shift > 0);
3759     tcg_debug_assert(shift <= (8 << vece));
3760
3761     /*
3762      * Shifts larger than the element size are architecturally valid.
3763      * Unsigned results in all zeros as input to accumulate: nop.
3764      */
3765     if (shift < (8 << vece)) {
3766         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3767     } else {
3768         /* Nop, but we do need to clear the tail. */
3769         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3770     }
3771 }
3772
3773 /*
3774  * Shift one less than the requested amount, and the low bit is
3775  * the rounding bit.  For the 8 and 16-bit operations, because we
3776  * mask the low bit, we can perform a normal integer shift instead
3777  * of a vector shift.
3778  */
3779 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3780 {
3781     TCGv_i64 t = tcg_temp_new_i64();
3782
3783     tcg_gen_shri_i64(t, a, sh - 1);
3784     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3785     tcg_gen_vec_sar8i_i64(d, a, sh);
3786     tcg_gen_vec_add8_i64(d, d, t);
3787     tcg_temp_free_i64(t);
3788 }
3789
3790 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3791 {
3792     TCGv_i64 t = tcg_temp_new_i64();
3793
3794     tcg_gen_shri_i64(t, a, sh - 1);
3795     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3796     tcg_gen_vec_sar16i_i64(d, a, sh);
3797     tcg_gen_vec_add16_i64(d, d, t);
3798     tcg_temp_free_i64(t);
3799 }
3800
3801 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3802 {
3803     TCGv_i32 t = tcg_temp_new_i32();
3804
3805     tcg_gen_extract_i32(t, a, sh - 1, 1);
3806     tcg_gen_sari_i32(d, a, sh);
3807     tcg_gen_add_i32(d, d, t);
3808     tcg_temp_free_i32(t);
3809 }
3810
3811 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3812 {
3813     TCGv_i64 t = tcg_temp_new_i64();
3814
3815     tcg_gen_extract_i64(t, a, sh - 1, 1);
3816     tcg_gen_sari_i64(d, a, sh);
3817     tcg_gen_add_i64(d, d, t);
3818     tcg_temp_free_i64(t);
3819 }
3820
3821 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3822 {
3823     TCGv_vec t = tcg_temp_new_vec_matching(d);
3824     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3825
3826     tcg_gen_shri_vec(vece, t, a, sh - 1);
3827     tcg_gen_dupi_vec(vece, ones, 1);
3828     tcg_gen_and_vec(vece, t, t, ones);
3829     tcg_gen_sari_vec(vece, d, a, sh);
3830     tcg_gen_add_vec(vece, d, d, t);
3831
3832     tcg_temp_free_vec(t);
3833     tcg_temp_free_vec(ones);
3834 }
3835
3836 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3837                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3838 {
3839     static const TCGOpcode vecop_list[] = {
3840         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3841     };
3842     static const GVecGen2i ops[4] = {
3843         { .fni8 = gen_srshr8_i64,
3844           .fniv = gen_srshr_vec,
3845           .fno = gen_helper_gvec_srshr_b,
3846           .opt_opc = vecop_list,
3847           .vece = MO_8 },
3848         { .fni8 = gen_srshr16_i64,
3849           .fniv = gen_srshr_vec,
3850           .fno = gen_helper_gvec_srshr_h,
3851           .opt_opc = vecop_list,
3852           .vece = MO_16 },
3853         { .fni4 = gen_srshr32_i32,
3854           .fniv = gen_srshr_vec,
3855           .fno = gen_helper_gvec_srshr_s,
3856           .opt_opc = vecop_list,
3857           .vece = MO_32 },
3858         { .fni8 = gen_srshr64_i64,
3859           .fniv = gen_srshr_vec,
3860           .fno = gen_helper_gvec_srshr_d,
3861           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3862           .opt_opc = vecop_list,
3863           .vece = MO_64 },
3864     };
3865
3866     /* tszimm encoding produces immediates in the range [1..esize] */
3867     tcg_debug_assert(shift > 0);
3868     tcg_debug_assert(shift <= (8 << vece));
3869
3870     if (shift == (8 << vece)) {
3871         /*
3872          * Shifts larger than the element size are architecturally valid.
3873          * Signed results in all sign bits.  With rounding, this produces
3874          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3875          * I.e. always zero.
3876          */
3877         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3878     } else {
3879         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3880     }
3881 }
3882
3883 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3884 {
3885     TCGv_i64 t = tcg_temp_new_i64();
3886
3887     gen_srshr8_i64(t, a, sh);
3888     tcg_gen_vec_add8_i64(d, d, t);
3889     tcg_temp_free_i64(t);
3890 }
3891
3892 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3893 {
3894     TCGv_i64 t = tcg_temp_new_i64();
3895
3896     gen_srshr16_i64(t, a, sh);
3897     tcg_gen_vec_add16_i64(d, d, t);
3898     tcg_temp_free_i64(t);
3899 }
3900
3901 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3902 {
3903     TCGv_i32 t = tcg_temp_new_i32();
3904
3905     gen_srshr32_i32(t, a, sh);
3906     tcg_gen_add_i32(d, d, t);
3907     tcg_temp_free_i32(t);
3908 }
3909
3910 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3911 {
3912     TCGv_i64 t = tcg_temp_new_i64();
3913
3914     gen_srshr64_i64(t, a, sh);
3915     tcg_gen_add_i64(d, d, t);
3916     tcg_temp_free_i64(t);
3917 }
3918
3919 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3920 {
3921     TCGv_vec t = tcg_temp_new_vec_matching(d);
3922
3923     gen_srshr_vec(vece, t, a, sh);
3924     tcg_gen_add_vec(vece, d, d, t);
3925     tcg_temp_free_vec(t);
3926 }
3927
3928 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3929                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3930 {
3931     static const TCGOpcode vecop_list[] = {
3932         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3933     };
3934     static const GVecGen2i ops[4] = {
3935         { .fni8 = gen_srsra8_i64,
3936           .fniv = gen_srsra_vec,
3937           .fno = gen_helper_gvec_srsra_b,
3938           .opt_opc = vecop_list,
3939           .load_dest = true,
3940           .vece = MO_8 },
3941         { .fni8 = gen_srsra16_i64,
3942           .fniv = gen_srsra_vec,
3943           .fno = gen_helper_gvec_srsra_h,
3944           .opt_opc = vecop_list,
3945           .load_dest = true,
3946           .vece = MO_16 },
3947         { .fni4 = gen_srsra32_i32,
3948           .fniv = gen_srsra_vec,
3949           .fno = gen_helper_gvec_srsra_s,
3950           .opt_opc = vecop_list,
3951           .load_dest = true,
3952           .vece = MO_32 },
3953         { .fni8 = gen_srsra64_i64,
3954           .fniv = gen_srsra_vec,
3955           .fno = gen_helper_gvec_srsra_d,
3956           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3957           .opt_opc = vecop_list,
3958           .load_dest = true,
3959           .vece = MO_64 },
3960     };
3961
3962     /* tszimm encoding produces immediates in the range [1..esize] */
3963     tcg_debug_assert(shift > 0);
3964     tcg_debug_assert(shift <= (8 << vece));
3965
3966     /*
3967      * Shifts larger than the element size are architecturally valid.
3968      * Signed results in all sign bits.  With rounding, this produces
3969      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3970      * I.e. always zero.  With accumulation, this leaves D unchanged.
3971      */
3972     if (shift == (8 << vece)) {
3973         /* Nop, but we do need to clear the tail. */
3974         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3975     } else {
3976         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3977     }
3978 }
3979
3980 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3981 {
3982     TCGv_i64 t = tcg_temp_new_i64();
3983
3984     tcg_gen_shri_i64(t, a, sh - 1);
3985     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3986     tcg_gen_vec_shr8i_i64(d, a, sh);
3987     tcg_gen_vec_add8_i64(d, d, t);
3988     tcg_temp_free_i64(t);
3989 }
3990
3991 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3992 {
3993     TCGv_i64 t = tcg_temp_new_i64();
3994
3995     tcg_gen_shri_i64(t, a, sh - 1);
3996     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3997     tcg_gen_vec_shr16i_i64(d, a, sh);
3998     tcg_gen_vec_add16_i64(d, d, t);
3999     tcg_temp_free_i64(t);
4000 }
4001
4002 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4003 {
4004     TCGv_i32 t = tcg_temp_new_i32();
4005
4006     tcg_gen_extract_i32(t, a, sh - 1, 1);
4007     tcg_gen_shri_i32(d, a, sh);
4008     tcg_gen_add_i32(d, d, t);
4009     tcg_temp_free_i32(t);
4010 }
4011
4012 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4013 {
4014     TCGv_i64 t = tcg_temp_new_i64();
4015
4016     tcg_gen_extract_i64(t, a, sh - 1, 1);
4017     tcg_gen_shri_i64(d, a, sh);
4018     tcg_gen_add_i64(d, d, t);
4019     tcg_temp_free_i64(t);
4020 }
4021
4022 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
4023 {
4024     TCGv_vec t = tcg_temp_new_vec_matching(d);
4025     TCGv_vec ones = tcg_temp_new_vec_matching(d);
4026
4027     tcg_gen_shri_vec(vece, t, a, shift - 1);
4028     tcg_gen_dupi_vec(vece, ones, 1);
4029     tcg_gen_and_vec(vece, t, t, ones);
4030     tcg_gen_shri_vec(vece, d, a, shift);
4031     tcg_gen_add_vec(vece, d, d, t);
4032
4033     tcg_temp_free_vec(t);
4034     tcg_temp_free_vec(ones);
4035 }
4036
4037 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4038                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4039 {
4040     static const TCGOpcode vecop_list[] = {
4041         INDEX_op_shri_vec, INDEX_op_add_vec, 0
4042     };
4043     static const GVecGen2i ops[4] = {
4044         { .fni8 = gen_urshr8_i64,
4045           .fniv = gen_urshr_vec,
4046           .fno = gen_helper_gvec_urshr_b,
4047           .opt_opc = vecop_list,
4048           .vece = MO_8 },
4049         { .fni8 = gen_urshr16_i64,
4050           .fniv = gen_urshr_vec,
4051           .fno = gen_helper_gvec_urshr_h,
4052           .opt_opc = vecop_list,
4053           .vece = MO_16 },
4054         { .fni4 = gen_urshr32_i32,
4055           .fniv = gen_urshr_vec,
4056           .fno = gen_helper_gvec_urshr_s,
4057           .opt_opc = vecop_list,
4058           .vece = MO_32 },
4059         { .fni8 = gen_urshr64_i64,
4060           .fniv = gen_urshr_vec,
4061           .fno = gen_helper_gvec_urshr_d,
4062           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4063           .opt_opc = vecop_list,
4064           .vece = MO_64 },
4065     };
4066
4067     /* tszimm encoding produces immediates in the range [1..esize] */
4068     tcg_debug_assert(shift > 0);
4069     tcg_debug_assert(shift <= (8 << vece));
4070
4071     if (shift == (8 << vece)) {
4072         /*
4073          * Shifts larger than the element size are architecturally valid.
4074          * Unsigned results in zero.  With rounding, this produces a
4075          * copy of the most significant bit.
4076          */
4077         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
4078     } else {
4079         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4080     }
4081 }
4082
4083 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4084 {
4085     TCGv_i64 t = tcg_temp_new_i64();
4086
4087     if (sh == 8) {
4088         tcg_gen_vec_shr8i_i64(t, a, 7);
4089     } else {
4090         gen_urshr8_i64(t, a, sh);
4091     }
4092     tcg_gen_vec_add8_i64(d, d, t);
4093     tcg_temp_free_i64(t);
4094 }
4095
4096 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4097 {
4098     TCGv_i64 t = tcg_temp_new_i64();
4099
4100     if (sh == 16) {
4101         tcg_gen_vec_shr16i_i64(t, a, 15);
4102     } else {
4103         gen_urshr16_i64(t, a, sh);
4104     }
4105     tcg_gen_vec_add16_i64(d, d, t);
4106     tcg_temp_free_i64(t);
4107 }
4108
4109 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4110 {
4111     TCGv_i32 t = tcg_temp_new_i32();
4112
4113     if (sh == 32) {
4114         tcg_gen_shri_i32(t, a, 31);
4115     } else {
4116         gen_urshr32_i32(t, a, sh);
4117     }
4118     tcg_gen_add_i32(d, d, t);
4119     tcg_temp_free_i32(t);
4120 }
4121
4122 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4123 {
4124     TCGv_i64 t = tcg_temp_new_i64();
4125
4126     if (sh == 64) {
4127         tcg_gen_shri_i64(t, a, 63);
4128     } else {
4129         gen_urshr64_i64(t, a, sh);
4130     }
4131     tcg_gen_add_i64(d, d, t);
4132     tcg_temp_free_i64(t);
4133 }
4134
4135 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4136 {
4137     TCGv_vec t = tcg_temp_new_vec_matching(d);
4138
4139     if (sh == (8 << vece)) {
4140         tcg_gen_shri_vec(vece, t, a, sh - 1);
4141     } else {
4142         gen_urshr_vec(vece, t, a, sh);
4143     }
4144     tcg_gen_add_vec(vece, d, d, t);
4145     tcg_temp_free_vec(t);
4146 }
4147
4148 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4149                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4150 {
4151     static const TCGOpcode vecop_list[] = {
4152         INDEX_op_shri_vec, INDEX_op_add_vec, 0
4153     };
4154     static const GVecGen2i ops[4] = {
4155         { .fni8 = gen_ursra8_i64,
4156           .fniv = gen_ursra_vec,
4157           .fno = gen_helper_gvec_ursra_b,
4158           .opt_opc = vecop_list,
4159           .load_dest = true,
4160           .vece = MO_8 },
4161         { .fni8 = gen_ursra16_i64,
4162           .fniv = gen_ursra_vec,
4163           .fno = gen_helper_gvec_ursra_h,
4164           .opt_opc = vecop_list,
4165           .load_dest = true,
4166           .vece = MO_16 },
4167         { .fni4 = gen_ursra32_i32,
4168           .fniv = gen_ursra_vec,
4169           .fno = gen_helper_gvec_ursra_s,
4170           .opt_opc = vecop_list,
4171           .load_dest = true,
4172           .vece = MO_32 },
4173         { .fni8 = gen_ursra64_i64,
4174           .fniv = gen_ursra_vec,
4175           .fno = gen_helper_gvec_ursra_d,
4176           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4177           .opt_opc = vecop_list,
4178           .load_dest = true,
4179           .vece = MO_64 },
4180     };
4181
4182     /* tszimm encoding produces immediates in the range [1..esize] */
4183     tcg_debug_assert(shift > 0);
4184     tcg_debug_assert(shift <= (8 << vece));
4185
4186     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4187 }
4188
4189 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4190 {
4191     uint64_t mask = dup_const(MO_8, 0xff >> shift);
4192     TCGv_i64 t = tcg_temp_new_i64();
4193
4194     tcg_gen_shri_i64(t, a, shift);
4195     tcg_gen_andi_i64(t, t, mask);
4196     tcg_gen_andi_i64(d, d, ~mask);
4197     tcg_gen_or_i64(d, d, t);
4198     tcg_temp_free_i64(t);
4199 }
4200
4201 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4202 {
4203     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4204     TCGv_i64 t = tcg_temp_new_i64();
4205
4206     tcg_gen_shri_i64(t, a, shift);
4207     tcg_gen_andi_i64(t, t, mask);
4208     tcg_gen_andi_i64(d, d, ~mask);
4209     tcg_gen_or_i64(d, d, t);
4210     tcg_temp_free_i64(t);
4211 }
4212
4213 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4214 {
4215     tcg_gen_shri_i32(a, a, shift);
4216     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4217 }
4218
4219 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4220 {
4221     tcg_gen_shri_i64(a, a, shift);
4222     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4223 }
4224
4225 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4226 {
4227     TCGv_vec t = tcg_temp_new_vec_matching(d);
4228     TCGv_vec m = tcg_temp_new_vec_matching(d);
4229
4230     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4231     tcg_gen_shri_vec(vece, t, a, sh);
4232     tcg_gen_and_vec(vece, d, d, m);
4233     tcg_gen_or_vec(vece, d, d, t);
4234
4235     tcg_temp_free_vec(t);
4236     tcg_temp_free_vec(m);
4237 }
4238
4239 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4240                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4241 {
4242     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
4243     const GVecGen2i ops[4] = {
4244         { .fni8 = gen_shr8_ins_i64,
4245           .fniv = gen_shr_ins_vec,
4246           .fno = gen_helper_gvec_sri_b,
4247           .load_dest = true,
4248           .opt_opc = vecop_list,
4249           .vece = MO_8 },
4250         { .fni8 = gen_shr16_ins_i64,
4251           .fniv = gen_shr_ins_vec,
4252           .fno = gen_helper_gvec_sri_h,
4253           .load_dest = true,
4254           .opt_opc = vecop_list,
4255           .vece = MO_16 },
4256         { .fni4 = gen_shr32_ins_i32,
4257           .fniv = gen_shr_ins_vec,
4258           .fno = gen_helper_gvec_sri_s,
4259           .load_dest = true,
4260           .opt_opc = vecop_list,
4261           .vece = MO_32 },
4262         { .fni8 = gen_shr64_ins_i64,
4263           .fniv = gen_shr_ins_vec,
4264           .fno = gen_helper_gvec_sri_d,
4265           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4266           .load_dest = true,
4267           .opt_opc = vecop_list,
4268           .vece = MO_64 },
4269     };
4270
4271     /* tszimm encoding produces immediates in the range [1..esize]. */
4272     tcg_debug_assert(shift > 0);
4273     tcg_debug_assert(shift <= (8 << vece));
4274
4275     /* Shift of esize leaves destination unchanged. */
4276     if (shift < (8 << vece)) {
4277         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4278     } else {
4279         /* Nop, but we do need to clear the tail. */
4280         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4281     }
4282 }
4283
4284 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4285 {
4286     uint64_t mask = dup_const(MO_8, 0xff << shift);
4287     TCGv_i64 t = tcg_temp_new_i64();
4288
4289     tcg_gen_shli_i64(t, a, shift);
4290     tcg_gen_andi_i64(t, t, mask);
4291     tcg_gen_andi_i64(d, d, ~mask);
4292     tcg_gen_or_i64(d, d, t);
4293     tcg_temp_free_i64(t);
4294 }
4295
4296 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4297 {
4298     uint64_t mask = dup_const(MO_16, 0xffff << shift);
4299     TCGv_i64 t = tcg_temp_new_i64();
4300
4301     tcg_gen_shli_i64(t, a, shift);
4302     tcg_gen_andi_i64(t, t, mask);
4303     tcg_gen_andi_i64(d, d, ~mask);
4304     tcg_gen_or_i64(d, d, t);
4305     tcg_temp_free_i64(t);
4306 }
4307
4308 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4309 {
4310     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4311 }
4312
4313 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4314 {
4315     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4316 }
4317
4318 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4319 {
4320     TCGv_vec t = tcg_temp_new_vec_matching(d);
4321     TCGv_vec m = tcg_temp_new_vec_matching(d);
4322
4323     tcg_gen_shli_vec(vece, t, a, sh);
4324     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4325     tcg_gen_and_vec(vece, d, d, m);
4326     tcg_gen_or_vec(vece, d, d, t);
4327
4328     tcg_temp_free_vec(t);
4329     tcg_temp_free_vec(m);
4330 }
4331
4332 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4333                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4334 {
4335     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
4336     const GVecGen2i ops[4] = {
4337         { .fni8 = gen_shl8_ins_i64,
4338           .fniv = gen_shl_ins_vec,
4339           .fno = gen_helper_gvec_sli_b,
4340           .load_dest = true,
4341           .opt_opc = vecop_list,
4342           .vece = MO_8 },
4343         { .fni8 = gen_shl16_ins_i64,
4344           .fniv = gen_shl_ins_vec,
4345           .fno = gen_helper_gvec_sli_h,
4346           .load_dest = true,
4347           .opt_opc = vecop_list,
4348           .vece = MO_16 },
4349         { .fni4 = gen_shl32_ins_i32,
4350           .fniv = gen_shl_ins_vec,
4351           .fno = gen_helper_gvec_sli_s,
4352           .load_dest = true,
4353           .opt_opc = vecop_list,
4354           .vece = MO_32 },
4355         { .fni8 = gen_shl64_ins_i64,
4356           .fniv = gen_shl_ins_vec,
4357           .fno = gen_helper_gvec_sli_d,
4358           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4359           .load_dest = true,
4360           .opt_opc = vecop_list,
4361           .vece = MO_64 },
4362     };
4363
4364     /* tszimm encoding produces immediates in the range [0..esize-1]. */
4365     tcg_debug_assert(shift >= 0);
4366     tcg_debug_assert(shift < (8 << vece));
4367
4368     if (shift == 0) {
4369         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
4370     } else {
4371         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4372     }
4373 }
4374
4375 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4376 {
4377     gen_helper_neon_mul_u8(a, a, b);
4378     gen_helper_neon_add_u8(d, d, a);
4379 }
4380
4381 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4382 {
4383     gen_helper_neon_mul_u8(a, a, b);
4384     gen_helper_neon_sub_u8(d, d, a);
4385 }
4386
4387 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4388 {
4389     gen_helper_neon_mul_u16(a, a, b);
4390     gen_helper_neon_add_u16(d, d, a);
4391 }
4392
4393 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4394 {
4395     gen_helper_neon_mul_u16(a, a, b);
4396     gen_helper_neon_sub_u16(d, d, a);
4397 }
4398
4399 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4400 {
4401     tcg_gen_mul_i32(a, a, b);
4402     tcg_gen_add_i32(d, d, a);
4403 }
4404
4405 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4406 {
4407     tcg_gen_mul_i32(a, a, b);
4408     tcg_gen_sub_i32(d, d, a);
4409 }
4410
4411 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4412 {
4413     tcg_gen_mul_i64(a, a, b);
4414     tcg_gen_add_i64(d, d, a);
4415 }
4416
4417 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4418 {
4419     tcg_gen_mul_i64(a, a, b);
4420     tcg_gen_sub_i64(d, d, a);
4421 }
4422
4423 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4424 {
4425     tcg_gen_mul_vec(vece, a, a, b);
4426     tcg_gen_add_vec(vece, d, d, a);
4427 }
4428
4429 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4430 {
4431     tcg_gen_mul_vec(vece, a, a, b);
4432     tcg_gen_sub_vec(vece, d, d, a);
4433 }
4434
4435 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4436  * these tables are shared with AArch64 which does support them.
4437  */
4438 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4439                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4440 {
4441     static const TCGOpcode vecop_list[] = {
4442         INDEX_op_mul_vec, INDEX_op_add_vec, 0
4443     };
4444     static const GVecGen3 ops[4] = {
4445         { .fni4 = gen_mla8_i32,
4446           .fniv = gen_mla_vec,
4447           .load_dest = true,
4448           .opt_opc = vecop_list,
4449           .vece = MO_8 },
4450         { .fni4 = gen_mla16_i32,
4451           .fniv = gen_mla_vec,
4452           .load_dest = true,
4453           .opt_opc = vecop_list,
4454           .vece = MO_16 },
4455         { .fni4 = gen_mla32_i32,
4456           .fniv = gen_mla_vec,
4457           .load_dest = true,
4458           .opt_opc = vecop_list,
4459           .vece = MO_32 },
4460         { .fni8 = gen_mla64_i64,
4461           .fniv = gen_mla_vec,
4462           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4463           .load_dest = true,
4464           .opt_opc = vecop_list,
4465           .vece = MO_64 },
4466     };
4467     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4468 }
4469
4470 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4471                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4472 {
4473     static const TCGOpcode vecop_list[] = {
4474         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4475     };
4476     static const GVecGen3 ops[4] = {
4477         { .fni4 = gen_mls8_i32,
4478           .fniv = gen_mls_vec,
4479           .load_dest = true,
4480           .opt_opc = vecop_list,
4481           .vece = MO_8 },
4482         { .fni4 = gen_mls16_i32,
4483           .fniv = gen_mls_vec,
4484           .load_dest = true,
4485           .opt_opc = vecop_list,
4486           .vece = MO_16 },
4487         { .fni4 = gen_mls32_i32,
4488           .fniv = gen_mls_vec,
4489           .load_dest = true,
4490           .opt_opc = vecop_list,
4491           .vece = MO_32 },
4492         { .fni8 = gen_mls64_i64,
4493           .fniv = gen_mls_vec,
4494           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4495           .load_dest = true,
4496           .opt_opc = vecop_list,
4497           .vece = MO_64 },
4498     };
4499     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4500 }
4501
4502 /* CMTST : test is "if (X & Y != 0)". */
4503 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4504 {
4505     tcg_gen_and_i32(d, a, b);
4506     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4507     tcg_gen_neg_i32(d, d);
4508 }
4509
4510 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4511 {
4512     tcg_gen_and_i64(d, a, b);
4513     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4514     tcg_gen_neg_i64(d, d);
4515 }
4516
4517 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4518 {
4519     tcg_gen_and_vec(vece, d, a, b);
4520     tcg_gen_dupi_vec(vece, a, 0);
4521     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4522 }
4523
4524 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4525                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4526 {
4527     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
4528     static const GVecGen3 ops[4] = {
4529         { .fni4 = gen_helper_neon_tst_u8,
4530           .fniv = gen_cmtst_vec,
4531           .opt_opc = vecop_list,
4532           .vece = MO_8 },
4533         { .fni4 = gen_helper_neon_tst_u16,
4534           .fniv = gen_cmtst_vec,
4535           .opt_opc = vecop_list,
4536           .vece = MO_16 },
4537         { .fni4 = gen_cmtst_i32,
4538           .fniv = gen_cmtst_vec,
4539           .opt_opc = vecop_list,
4540           .vece = MO_32 },
4541         { .fni8 = gen_cmtst_i64,
4542           .fniv = gen_cmtst_vec,
4543           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4544           .opt_opc = vecop_list,
4545           .vece = MO_64 },
4546     };
4547     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4548 }
4549
4550 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4551 {
4552     TCGv_i32 lval = tcg_temp_new_i32();
4553     TCGv_i32 rval = tcg_temp_new_i32();
4554     TCGv_i32 lsh = tcg_temp_new_i32();
4555     TCGv_i32 rsh = tcg_temp_new_i32();
4556     TCGv_i32 zero = tcg_const_i32(0);
4557     TCGv_i32 max = tcg_const_i32(32);
4558
4559     /*
4560      * Rely on the TCG guarantee that out of range shifts produce
4561      * unspecified results, not undefined behaviour (i.e. no trap).
4562      * Discard out-of-range results after the fact.
4563      */
4564     tcg_gen_ext8s_i32(lsh, shift);
4565     tcg_gen_neg_i32(rsh, lsh);
4566     tcg_gen_shl_i32(lval, src, lsh);
4567     tcg_gen_shr_i32(rval, src, rsh);
4568     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4569     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4570
4571     tcg_temp_free_i32(lval);
4572     tcg_temp_free_i32(rval);
4573     tcg_temp_free_i32(lsh);
4574     tcg_temp_free_i32(rsh);
4575     tcg_temp_free_i32(zero);
4576     tcg_temp_free_i32(max);
4577 }
4578
4579 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4580 {
4581     TCGv_i64 lval = tcg_temp_new_i64();
4582     TCGv_i64 rval = tcg_temp_new_i64();
4583     TCGv_i64 lsh = tcg_temp_new_i64();
4584     TCGv_i64 rsh = tcg_temp_new_i64();
4585     TCGv_i64 zero = tcg_const_i64(0);
4586     TCGv_i64 max = tcg_const_i64(64);
4587
4588     /*
4589      * Rely on the TCG guarantee that out of range shifts produce
4590      * unspecified results, not undefined behaviour (i.e. no trap).
4591      * Discard out-of-range results after the fact.
4592      */
4593     tcg_gen_ext8s_i64(lsh, shift);
4594     tcg_gen_neg_i64(rsh, lsh);
4595     tcg_gen_shl_i64(lval, src, lsh);
4596     tcg_gen_shr_i64(rval, src, rsh);
4597     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4598     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4599
4600     tcg_temp_free_i64(lval);
4601     tcg_temp_free_i64(rval);
4602     tcg_temp_free_i64(lsh);
4603     tcg_temp_free_i64(rsh);
4604     tcg_temp_free_i64(zero);
4605     tcg_temp_free_i64(max);
4606 }
4607
4608 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4609                          TCGv_vec src, TCGv_vec shift)
4610 {
4611     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4612     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4613     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4614     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4615     TCGv_vec msk, max;
4616
4617     tcg_gen_neg_vec(vece, rsh, shift);
4618     if (vece == MO_8) {
4619         tcg_gen_mov_vec(lsh, shift);
4620     } else {
4621         msk = tcg_temp_new_vec_matching(dst);
4622         tcg_gen_dupi_vec(vece, msk, 0xff);
4623         tcg_gen_and_vec(vece, lsh, shift, msk);
4624         tcg_gen_and_vec(vece, rsh, rsh, msk);
4625         tcg_temp_free_vec(msk);
4626     }
4627
4628     /*
4629      * Rely on the TCG guarantee that out of range shifts produce
4630      * unspecified results, not undefined behaviour (i.e. no trap).
4631      * Discard out-of-range results after the fact.
4632      */
4633     tcg_gen_shlv_vec(vece, lval, src, lsh);
4634     tcg_gen_shrv_vec(vece, rval, src, rsh);
4635
4636     max = tcg_temp_new_vec_matching(dst);
4637     tcg_gen_dupi_vec(vece, max, 8 << vece);
4638
4639     /*
4640      * The choice of LT (signed) and GEU (unsigned) are biased toward
4641      * the instructions of the x86_64 host.  For MO_8, the whole byte
4642      * is significant so we must use an unsigned compare; otherwise we
4643      * have already masked to a byte and so a signed compare works.
4644      * Other tcg hosts have a full set of comparisons and do not care.
4645      */
4646     if (vece == MO_8) {
4647         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4648         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4649         tcg_gen_andc_vec(vece, lval, lval, lsh);
4650         tcg_gen_andc_vec(vece, rval, rval, rsh);
4651     } else {
4652         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4653         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4654         tcg_gen_and_vec(vece, lval, lval, lsh);
4655         tcg_gen_and_vec(vece, rval, rval, rsh);
4656     }
4657     tcg_gen_or_vec(vece, dst, lval, rval);
4658
4659     tcg_temp_free_vec(max);
4660     tcg_temp_free_vec(lval);
4661     tcg_temp_free_vec(rval);
4662     tcg_temp_free_vec(lsh);
4663     tcg_temp_free_vec(rsh);
4664 }
4665
4666 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4667                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4668 {
4669     static const TCGOpcode vecop_list[] = {
4670         INDEX_op_neg_vec, INDEX_op_shlv_vec,
4671         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4672     };
4673     static const GVecGen3 ops[4] = {
4674         { .fniv = gen_ushl_vec,
4675           .fno = gen_helper_gvec_ushl_b,
4676           .opt_opc = vecop_list,
4677           .vece = MO_8 },
4678         { .fniv = gen_ushl_vec,
4679           .fno = gen_helper_gvec_ushl_h,
4680           .opt_opc = vecop_list,
4681           .vece = MO_16 },
4682         { .fni4 = gen_ushl_i32,
4683           .fniv = gen_ushl_vec,
4684           .opt_opc = vecop_list,
4685           .vece = MO_32 },
4686         { .fni8 = gen_ushl_i64,
4687           .fniv = gen_ushl_vec,
4688           .opt_opc = vecop_list,
4689           .vece = MO_64 },
4690     };
4691     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4692 }
4693
4694 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4695 {
4696     TCGv_i32 lval = tcg_temp_new_i32();
4697     TCGv_i32 rval = tcg_temp_new_i32();
4698     TCGv_i32 lsh = tcg_temp_new_i32();
4699     TCGv_i32 rsh = tcg_temp_new_i32();
4700     TCGv_i32 zero = tcg_const_i32(0);
4701     TCGv_i32 max = tcg_const_i32(31);
4702
4703     /*
4704      * Rely on the TCG guarantee that out of range shifts produce
4705      * unspecified results, not undefined behaviour (i.e. no trap).
4706      * Discard out-of-range results after the fact.
4707      */
4708     tcg_gen_ext8s_i32(lsh, shift);
4709     tcg_gen_neg_i32(rsh, lsh);
4710     tcg_gen_shl_i32(lval, src, lsh);
4711     tcg_gen_umin_i32(rsh, rsh, max);
4712     tcg_gen_sar_i32(rval, src, rsh);
4713     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4714     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4715
4716     tcg_temp_free_i32(lval);
4717     tcg_temp_free_i32(rval);
4718     tcg_temp_free_i32(lsh);
4719     tcg_temp_free_i32(rsh);
4720     tcg_temp_free_i32(zero);
4721     tcg_temp_free_i32(max);
4722 }
4723
4724 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4725 {
4726     TCGv_i64 lval = tcg_temp_new_i64();
4727     TCGv_i64 rval = tcg_temp_new_i64();
4728     TCGv_i64 lsh = tcg_temp_new_i64();
4729     TCGv_i64 rsh = tcg_temp_new_i64();
4730     TCGv_i64 zero = tcg_const_i64(0);
4731     TCGv_i64 max = tcg_const_i64(63);
4732
4733     /*
4734      * Rely on the TCG guarantee that out of range shifts produce
4735      * unspecified results, not undefined behaviour (i.e. no trap).
4736      * Discard out-of-range results after the fact.
4737      */
4738     tcg_gen_ext8s_i64(lsh, shift);
4739     tcg_gen_neg_i64(rsh, lsh);
4740     tcg_gen_shl_i64(lval, src, lsh);
4741     tcg_gen_umin_i64(rsh, rsh, max);
4742     tcg_gen_sar_i64(rval, src, rsh);
4743     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4744     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4745
4746     tcg_temp_free_i64(lval);
4747     tcg_temp_free_i64(rval);
4748     tcg_temp_free_i64(lsh);
4749     tcg_temp_free_i64(rsh);
4750     tcg_temp_free_i64(zero);
4751     tcg_temp_free_i64(max);
4752 }
4753
4754 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4755                          TCGv_vec src, TCGv_vec shift)
4756 {
4757     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4758     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4759     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4760     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4761     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4762
4763     /*
4764      * Rely on the TCG guarantee that out of range shifts produce
4765      * unspecified results, not undefined behaviour (i.e. no trap).
4766      * Discard out-of-range results after the fact.
4767      */
4768     tcg_gen_neg_vec(vece, rsh, shift);
4769     if (vece == MO_8) {
4770         tcg_gen_mov_vec(lsh, shift);
4771     } else {
4772         tcg_gen_dupi_vec(vece, tmp, 0xff);
4773         tcg_gen_and_vec(vece, lsh, shift, tmp);
4774         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4775     }
4776
4777     /* Bound rsh so out of bound right shift gets -1.  */
4778     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4779     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4780     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4781
4782     tcg_gen_shlv_vec(vece, lval, src, lsh);
4783     tcg_gen_sarv_vec(vece, rval, src, rsh);
4784
4785     /* Select in-bound left shift.  */
4786     tcg_gen_andc_vec(vece, lval, lval, tmp);
4787
4788     /* Select between left and right shift.  */
4789     if (vece == MO_8) {
4790         tcg_gen_dupi_vec(vece, tmp, 0);
4791         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4792     } else {
4793         tcg_gen_dupi_vec(vece, tmp, 0x80);
4794         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4795     }
4796
4797     tcg_temp_free_vec(lval);
4798     tcg_temp_free_vec(rval);
4799     tcg_temp_free_vec(lsh);
4800     tcg_temp_free_vec(rsh);
4801     tcg_temp_free_vec(tmp);
4802 }
4803
4804 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4805                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4806 {
4807     static const TCGOpcode vecop_list[] = {
4808         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4809         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4810     };
4811     static const GVecGen3 ops[4] = {
4812         { .fniv = gen_sshl_vec,
4813           .fno = gen_helper_gvec_sshl_b,
4814           .opt_opc = vecop_list,
4815           .vece = MO_8 },
4816         { .fniv = gen_sshl_vec,
4817           .fno = gen_helper_gvec_sshl_h,
4818           .opt_opc = vecop_list,
4819           .vece = MO_16 },
4820         { .fni4 = gen_sshl_i32,
4821           .fniv = gen_sshl_vec,
4822           .opt_opc = vecop_list,
4823           .vece = MO_32 },
4824         { .fni8 = gen_sshl_i64,
4825           .fniv = gen_sshl_vec,
4826           .opt_opc = vecop_list,
4827           .vece = MO_64 },
4828     };
4829     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4830 }
4831
4832 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4833                           TCGv_vec a, TCGv_vec b)
4834 {
4835     TCGv_vec x = tcg_temp_new_vec_matching(t);
4836     tcg_gen_add_vec(vece, x, a, b);
4837     tcg_gen_usadd_vec(vece, t, a, b);
4838     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4839     tcg_gen_or_vec(vece, sat, sat, x);
4840     tcg_temp_free_vec(x);
4841 }
4842
4843 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4844                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4845 {
4846     static const TCGOpcode vecop_list[] = {
4847         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4848     };
4849     static const GVecGen4 ops[4] = {
4850         { .fniv = gen_uqadd_vec,
4851           .fno = gen_helper_gvec_uqadd_b,
4852           .write_aofs = true,
4853           .opt_opc = vecop_list,
4854           .vece = MO_8 },
4855         { .fniv = gen_uqadd_vec,
4856           .fno = gen_helper_gvec_uqadd_h,
4857           .write_aofs = true,
4858           .opt_opc = vecop_list,
4859           .vece = MO_16 },
4860         { .fniv = gen_uqadd_vec,
4861           .fno = gen_helper_gvec_uqadd_s,
4862           .write_aofs = true,
4863           .opt_opc = vecop_list,
4864           .vece = MO_32 },
4865         { .fniv = gen_uqadd_vec,
4866           .fno = gen_helper_gvec_uqadd_d,
4867           .write_aofs = true,
4868           .opt_opc = vecop_list,
4869           .vece = MO_64 },
4870     };
4871     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4872                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4873 }
4874
4875 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4876                           TCGv_vec a, TCGv_vec b)
4877 {
4878     TCGv_vec x = tcg_temp_new_vec_matching(t);
4879     tcg_gen_add_vec(vece, x, a, b);
4880     tcg_gen_ssadd_vec(vece, t, a, b);
4881     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4882     tcg_gen_or_vec(vece, sat, sat, x);
4883     tcg_temp_free_vec(x);
4884 }
4885
4886 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4887                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4888 {
4889     static const TCGOpcode vecop_list[] = {
4890         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4891     };
4892     static const GVecGen4 ops[4] = {
4893         { .fniv = gen_sqadd_vec,
4894           .fno = gen_helper_gvec_sqadd_b,
4895           .opt_opc = vecop_list,
4896           .write_aofs = true,
4897           .vece = MO_8 },
4898         { .fniv = gen_sqadd_vec,
4899           .fno = gen_helper_gvec_sqadd_h,
4900           .opt_opc = vecop_list,
4901           .write_aofs = true,
4902           .vece = MO_16 },
4903         { .fniv = gen_sqadd_vec,
4904           .fno = gen_helper_gvec_sqadd_s,
4905           .opt_opc = vecop_list,
4906           .write_aofs = true,
4907           .vece = MO_32 },
4908         { .fniv = gen_sqadd_vec,
4909           .fno = gen_helper_gvec_sqadd_d,
4910           .opt_opc = vecop_list,
4911           .write_aofs = true,
4912           .vece = MO_64 },
4913     };
4914     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4915                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4916 }
4917
4918 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4919                           TCGv_vec a, TCGv_vec b)
4920 {
4921     TCGv_vec x = tcg_temp_new_vec_matching(t);
4922     tcg_gen_sub_vec(vece, x, a, b);
4923     tcg_gen_ussub_vec(vece, t, a, b);
4924     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4925     tcg_gen_or_vec(vece, sat, sat, x);
4926     tcg_temp_free_vec(x);
4927 }
4928
4929 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4930                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4931 {
4932     static const TCGOpcode vecop_list[] = {
4933         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4934     };
4935     static const GVecGen4 ops[4] = {
4936         { .fniv = gen_uqsub_vec,
4937           .fno = gen_helper_gvec_uqsub_b,
4938           .opt_opc = vecop_list,
4939           .write_aofs = true,
4940           .vece = MO_8 },
4941         { .fniv = gen_uqsub_vec,
4942           .fno = gen_helper_gvec_uqsub_h,
4943           .opt_opc = vecop_list,
4944           .write_aofs = true,
4945           .vece = MO_16 },
4946         { .fniv = gen_uqsub_vec,
4947           .fno = gen_helper_gvec_uqsub_s,
4948           .opt_opc = vecop_list,
4949           .write_aofs = true,
4950           .vece = MO_32 },
4951         { .fniv = gen_uqsub_vec,
4952           .fno = gen_helper_gvec_uqsub_d,
4953           .opt_opc = vecop_list,
4954           .write_aofs = true,
4955           .vece = MO_64 },
4956     };
4957     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4958                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4959 }
4960
4961 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4962                           TCGv_vec a, TCGv_vec b)
4963 {
4964     TCGv_vec x = tcg_temp_new_vec_matching(t);
4965     tcg_gen_sub_vec(vece, x, a, b);
4966     tcg_gen_sssub_vec(vece, t, a, b);
4967     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4968     tcg_gen_or_vec(vece, sat, sat, x);
4969     tcg_temp_free_vec(x);
4970 }
4971
4972 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4973                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4974 {
4975     static const TCGOpcode vecop_list[] = {
4976         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4977     };
4978     static const GVecGen4 ops[4] = {
4979         { .fniv = gen_sqsub_vec,
4980           .fno = gen_helper_gvec_sqsub_b,
4981           .opt_opc = vecop_list,
4982           .write_aofs = true,
4983           .vece = MO_8 },
4984         { .fniv = gen_sqsub_vec,
4985           .fno = gen_helper_gvec_sqsub_h,
4986           .opt_opc = vecop_list,
4987           .write_aofs = true,
4988           .vece = MO_16 },
4989         { .fniv = gen_sqsub_vec,
4990           .fno = gen_helper_gvec_sqsub_s,
4991           .opt_opc = vecop_list,
4992           .write_aofs = true,
4993           .vece = MO_32 },
4994         { .fniv = gen_sqsub_vec,
4995           .fno = gen_helper_gvec_sqsub_d,
4996           .opt_opc = vecop_list,
4997           .write_aofs = true,
4998           .vece = MO_64 },
4999     };
5000     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
5001                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5002 }
5003
5004 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5005 {
5006     TCGv_i32 t = tcg_temp_new_i32();
5007
5008     tcg_gen_sub_i32(t, a, b);
5009     tcg_gen_sub_i32(d, b, a);
5010     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
5011     tcg_temp_free_i32(t);
5012 }
5013
5014 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5015 {
5016     TCGv_i64 t = tcg_temp_new_i64();
5017
5018     tcg_gen_sub_i64(t, a, b);
5019     tcg_gen_sub_i64(d, b, a);
5020     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
5021     tcg_temp_free_i64(t);
5022 }
5023
5024 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5025 {
5026     TCGv_vec t = tcg_temp_new_vec_matching(d);
5027
5028     tcg_gen_smin_vec(vece, t, a, b);
5029     tcg_gen_smax_vec(vece, d, a, b);
5030     tcg_gen_sub_vec(vece, d, d, t);
5031     tcg_temp_free_vec(t);
5032 }
5033
5034 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5035                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5036 {
5037     static const TCGOpcode vecop_list[] = {
5038         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
5039     };
5040     static const GVecGen3 ops[4] = {
5041         { .fniv = gen_sabd_vec,
5042           .fno = gen_helper_gvec_sabd_b,
5043           .opt_opc = vecop_list,
5044           .vece = MO_8 },
5045         { .fniv = gen_sabd_vec,
5046           .fno = gen_helper_gvec_sabd_h,
5047           .opt_opc = vecop_list,
5048           .vece = MO_16 },
5049         { .fni4 = gen_sabd_i32,
5050           .fniv = gen_sabd_vec,
5051           .fno = gen_helper_gvec_sabd_s,
5052           .opt_opc = vecop_list,
5053           .vece = MO_32 },
5054         { .fni8 = gen_sabd_i64,
5055           .fniv = gen_sabd_vec,
5056           .fno = gen_helper_gvec_sabd_d,
5057           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5058           .opt_opc = vecop_list,
5059           .vece = MO_64 },
5060     };
5061     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5062 }
5063
5064 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5065 {
5066     TCGv_i32 t = tcg_temp_new_i32();
5067
5068     tcg_gen_sub_i32(t, a, b);
5069     tcg_gen_sub_i32(d, b, a);
5070     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
5071     tcg_temp_free_i32(t);
5072 }
5073
5074 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5075 {
5076     TCGv_i64 t = tcg_temp_new_i64();
5077
5078     tcg_gen_sub_i64(t, a, b);
5079     tcg_gen_sub_i64(d, b, a);
5080     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
5081     tcg_temp_free_i64(t);
5082 }
5083
5084 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5085 {
5086     TCGv_vec t = tcg_temp_new_vec_matching(d);
5087
5088     tcg_gen_umin_vec(vece, t, a, b);
5089     tcg_gen_umax_vec(vece, d, a, b);
5090     tcg_gen_sub_vec(vece, d, d, t);
5091     tcg_temp_free_vec(t);
5092 }
5093
5094 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5095                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5096 {
5097     static const TCGOpcode vecop_list[] = {
5098         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5099     };
5100     static const GVecGen3 ops[4] = {
5101         { .fniv = gen_uabd_vec,
5102           .fno = gen_helper_gvec_uabd_b,
5103           .opt_opc = vecop_list,
5104           .vece = MO_8 },
5105         { .fniv = gen_uabd_vec,
5106           .fno = gen_helper_gvec_uabd_h,
5107           .opt_opc = vecop_list,
5108           .vece = MO_16 },
5109         { .fni4 = gen_uabd_i32,
5110           .fniv = gen_uabd_vec,
5111           .fno = gen_helper_gvec_uabd_s,
5112           .opt_opc = vecop_list,
5113           .vece = MO_32 },
5114         { .fni8 = gen_uabd_i64,
5115           .fniv = gen_uabd_vec,
5116           .fno = gen_helper_gvec_uabd_d,
5117           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5118           .opt_opc = vecop_list,
5119           .vece = MO_64 },
5120     };
5121     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5122 }
5123
5124 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5125 {
5126     TCGv_i32 t = tcg_temp_new_i32();
5127     gen_sabd_i32(t, a, b);
5128     tcg_gen_add_i32(d, d, t);
5129     tcg_temp_free_i32(t);
5130 }
5131
5132 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5133 {
5134     TCGv_i64 t = tcg_temp_new_i64();
5135     gen_sabd_i64(t, a, b);
5136     tcg_gen_add_i64(d, d, t);
5137     tcg_temp_free_i64(t);
5138 }
5139
5140 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5141 {
5142     TCGv_vec t = tcg_temp_new_vec_matching(d);
5143     gen_sabd_vec(vece, t, a, b);
5144     tcg_gen_add_vec(vece, d, d, t);
5145     tcg_temp_free_vec(t);
5146 }
5147
5148 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5149                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5150 {
5151     static const TCGOpcode vecop_list[] = {
5152         INDEX_op_sub_vec, INDEX_op_add_vec,
5153         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
5154     };
5155     static const GVecGen3 ops[4] = {
5156         { .fniv = gen_saba_vec,
5157           .fno = gen_helper_gvec_saba_b,
5158           .opt_opc = vecop_list,
5159           .load_dest = true,
5160           .vece = MO_8 },
5161         { .fniv = gen_saba_vec,
5162           .fno = gen_helper_gvec_saba_h,
5163           .opt_opc = vecop_list,
5164           .load_dest = true,
5165           .vece = MO_16 },
5166         { .fni4 = gen_saba_i32,
5167           .fniv = gen_saba_vec,
5168           .fno = gen_helper_gvec_saba_s,
5169           .opt_opc = vecop_list,
5170           .load_dest = true,
5171           .vece = MO_32 },
5172         { .fni8 = gen_saba_i64,
5173           .fniv = gen_saba_vec,
5174           .fno = gen_helper_gvec_saba_d,
5175           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5176           .opt_opc = vecop_list,
5177           .load_dest = true,
5178           .vece = MO_64 },
5179     };
5180     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5181 }
5182
5183 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5184 {
5185     TCGv_i32 t = tcg_temp_new_i32();
5186     gen_uabd_i32(t, a, b);
5187     tcg_gen_add_i32(d, d, t);
5188     tcg_temp_free_i32(t);
5189 }
5190
5191 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5192 {
5193     TCGv_i64 t = tcg_temp_new_i64();
5194     gen_uabd_i64(t, a, b);
5195     tcg_gen_add_i64(d, d, t);
5196     tcg_temp_free_i64(t);
5197 }
5198
5199 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5200 {
5201     TCGv_vec t = tcg_temp_new_vec_matching(d);
5202     gen_uabd_vec(vece, t, a, b);
5203     tcg_gen_add_vec(vece, d, d, t);
5204     tcg_temp_free_vec(t);
5205 }
5206
5207 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5208                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5209 {
5210     static const TCGOpcode vecop_list[] = {
5211         INDEX_op_sub_vec, INDEX_op_add_vec,
5212         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5213     };
5214     static const GVecGen3 ops[4] = {
5215         { .fniv = gen_uaba_vec,
5216           .fno = gen_helper_gvec_uaba_b,
5217           .opt_opc = vecop_list,
5218           .load_dest = true,
5219           .vece = MO_8 },
5220         { .fniv = gen_uaba_vec,
5221           .fno = gen_helper_gvec_uaba_h,
5222           .opt_opc = vecop_list,
5223           .load_dest = true,
5224           .vece = MO_16 },
5225         { .fni4 = gen_uaba_i32,
5226           .fniv = gen_uaba_vec,
5227           .fno = gen_helper_gvec_uaba_s,
5228           .opt_opc = vecop_list,
5229           .load_dest = true,
5230           .vece = MO_32 },
5231         { .fni8 = gen_uaba_i64,
5232           .fniv = gen_uaba_vec,
5233           .fno = gen_helper_gvec_uaba_d,
5234           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5235           .opt_opc = vecop_list,
5236           .load_dest = true,
5237           .vece = MO_64 },
5238     };
5239     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5240 }
5241
5242 /* Translate a NEON data processing instruction.  Return nonzero if the
5243    instruction is invalid.
5244    We process data in a mixture of 32-bit and 64-bit chunks.
5245    Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
5246
5247 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5248 {
5249     int op;
5250     int q;
5251     int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
5252     int size;
5253     int shift;
5254     int pass;
5255     int count;
5256     int u;
5257     int vec_size;
5258     uint32_t imm;
5259     TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5260     TCGv_ptr ptr1, ptr2;
5261     TCGv_i64 tmp64;
5262
5263     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
5264         return 1;
5265     }
5266
5267     /* FIXME: this access check should not take precedence over UNDEF
5268      * for invalid encodings; we will generate incorrect syndrome information
5269      * for attempts to execute invalid vfp/neon encodings with FP disabled.
5270      */
5271     if (s->fp_excp_el) {
5272         gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
5273                            syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
5274         return 0;
5275     }
5276
5277     if (!s->vfp_enabled)
5278       return 1;
5279     q = (insn & (1 << 6)) != 0;
5280     u = (insn >> 24) & 1;
5281     VFP_DREG_D(rd, insn);
5282     VFP_DREG_N(rn, insn);
5283     VFP_DREG_M(rm, insn);
5284     size = (insn >> 20) & 3;
5285     vec_size = q ? 16 : 8;
5286     rd_ofs = neon_reg_offset(rd, 0);
5287     rn_ofs = neon_reg_offset(rn, 0);
5288     rm_ofs = neon_reg_offset(rm, 0);
5289
5290     if ((insn & (1 << 23)) == 0) {
5291         /* Three register same length: handled by decodetree */
5292         return 1;
5293     } else if (insn & (1 << 4)) {
5294         if ((insn & 0x00380080) != 0) {
5295             /* Two registers and shift.  */
5296             op = (insn >> 8) & 0xf;
5297             if (insn & (1 << 7)) {
5298                 /* 64-bit shift. */
5299                 if (op > 7) {
5300                     return 1;
5301                 }
5302                 size = 3;
5303             } else {
5304                 size = 2;
5305                 while ((insn & (1 << (size + 19))) == 0)
5306                     size--;
5307             }
5308             shift = (insn >> 16) & ((1 << (3 + size)) - 1);
5309             if (op < 8) {
5310                 /* Shift by immediate:
5311                    VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
5312                 if (q && ((rd | rm) & 1)) {
5313                     return 1;
5314                 }
5315                 if (!u && (op == 4 || op == 6)) {
5316                     return 1;
5317                 }
5318                 /* Right shifts are encoded as N - shift, where N is the
5319                    element size in bits.  */
5320                 if (op <= 4) {
5321                     shift = shift - (1 << (size + 3));
5322                 }
5323
5324                 switch (op) {
5325                 case 0:  /* VSHR */
5326                     /* Right shift comes here negative.  */
5327                     shift = -shift;
5328                     /* Shifts larger than the element size are architecturally
5329                      * valid.  Unsigned results in all zeros; signed results
5330                      * in all sign bits.
5331                      */
5332                     if (!u) {
5333                         tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
5334                                           MIN(shift, (8 << size) - 1),
5335                                           vec_size, vec_size);
5336                     } else if (shift >= 8 << size) {
5337                         tcg_gen_gvec_dup_imm(MO_8, rd_ofs, vec_size,
5338                                              vec_size, 0);
5339                     } else {
5340                         tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
5341                                           vec_size, vec_size);
5342                     }
5343                     return 0;
5344
5345                 case 1:  /* VSRA */
5346                     /* Right shift comes here negative.  */
5347                     shift = -shift;
5348                     if (u) {
5349                         gen_gvec_usra(size, rd_ofs, rm_ofs, shift,
5350                                       vec_size, vec_size);
5351                     } else {
5352                         gen_gvec_ssra(size, rd_ofs, rm_ofs, shift,
5353                                       vec_size, vec_size);
5354                     }
5355                     return 0;
5356
5357                 case 2: /* VRSHR */
5358                     /* Right shift comes here negative.  */
5359                     shift = -shift;
5360                     if (u) {
5361                         gen_gvec_urshr(size, rd_ofs, rm_ofs, shift,
5362                                        vec_size, vec_size);
5363                     } else {
5364                         gen_gvec_srshr(size, rd_ofs, rm_ofs, shift,
5365                                        vec_size, vec_size);
5366                     }
5367                     return 0;
5368
5369                 case 3: /* VRSRA */
5370                     /* Right shift comes here negative.  */
5371                     shift = -shift;
5372                     if (u) {
5373                         gen_gvec_ursra(size, rd_ofs, rm_ofs, shift,
5374                                        vec_size, vec_size);
5375                     } else {
5376                         gen_gvec_srsra(size, rd_ofs, rm_ofs, shift,
5377                                        vec_size, vec_size);
5378                     }
5379                     return 0;
5380
5381                 case 4: /* VSRI */
5382                     if (!u) {
5383                         return 1;
5384                     }
5385                     /* Right shift comes here negative.  */
5386                     shift = -shift;
5387                     gen_gvec_sri(size, rd_ofs, rm_ofs, shift,
5388                                  vec_size, vec_size);
5389                     return 0;
5390
5391                 case 5: /* VSHL, VSLI */
5392                     if (u) { /* VSLI */
5393                         gen_gvec_sli(size, rd_ofs, rm_ofs, shift,
5394                                      vec_size, vec_size);
5395                     } else { /* VSHL */
5396                         tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
5397                                           vec_size, vec_size);
5398                     }
5399                     return 0;
5400                 }
5401
5402                 if (size == 3) {
5403                     count = q + 1;
5404                 } else {
5405                     count = q ? 4: 2;
5406                 }
5407
5408                 /* To avoid excessive duplication of ops we implement shift
5409                  * by immediate using the variable shift operations.
5410                   */
5411                 imm = dup_const(size, shift);
5412
5413                 for (pass = 0; pass < count; pass++) {
5414                     if (size == 3) {
5415                         neon_load_reg64(cpu_V0, rm + pass);
5416                         tcg_gen_movi_i64(cpu_V1, imm);
5417                         switch (op) {
5418                         case 6: /* VQSHLU */
5419                             gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5420                                                       cpu_V0, cpu_V1);
5421                             break;
5422                         case 7: /* VQSHL */
5423                             if (u) {
5424                                 gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5425                                                          cpu_V0, cpu_V1);
5426                             } else {
5427                                 gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5428                                                          cpu_V0, cpu_V1);
5429                             }
5430                             break;
5431                         default:
5432                             g_assert_not_reached();
5433                         }
5434                         neon_store_reg64(cpu_V0, rd + pass);
5435                     } else { /* size < 3 */
5436                         /* Operands in T0 and T1.  */
5437                         tmp = neon_load_reg(rm, pass);
5438                         tmp2 = tcg_temp_new_i32();
5439                         tcg_gen_movi_i32(tmp2, imm);
5440                         switch (op) {
5441                         case 6: /* VQSHLU */
5442                             switch (size) {
5443                             case 0:
5444                                 gen_helper_neon_qshlu_s8(tmp, cpu_env,
5445                                                          tmp, tmp2);
5446                                 break;
5447                             case 1:
5448                                 gen_helper_neon_qshlu_s16(tmp, cpu_env,
5449                                                           tmp, tmp2);
5450                                 break;
5451                             case 2:
5452                                 gen_helper_neon_qshlu_s32(tmp, cpu_env,
5453                                                           tmp, tmp2);
5454                                 break;
5455                             default:
5456                                 abort();
5457                             }
5458                             break;
5459                         case 7: /* VQSHL */
5460                             GEN_NEON_INTEGER_OP_ENV(qshl);
5461                             break;
5462                         default:
5463                             g_assert_not_reached();
5464                         }
5465                         tcg_temp_free_i32(tmp2);
5466                         neon_store_reg(rd, pass, tmp);
5467                     }
5468                 } /* for pass */
5469             } else if (op < 10) {
5470                 /* Shift by immediate and narrow:
5471                    VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
5472                 int input_unsigned = (op == 8) ? !u : u;
5473                 if (rm & 1) {
5474                     return 1;
5475                 }
5476                 shift = shift - (1 << (size + 3));
5477                 size++;
5478                 if (size == 3) {
5479                     tmp64 = tcg_const_i64(shift);
5480                     neon_load_reg64(cpu_V0, rm);
5481                     neon_load_reg64(cpu_V1, rm + 1);
5482                     for (pass = 0; pass < 2; pass++) {
5483                         TCGv_i64 in;
5484                         if (pass == 0) {
5485                             in = cpu_V0;
5486                         } else {
5487                             in = cpu_V1;
5488                         }
5489                         if (q) {
5490                             if (input_unsigned) {
5491                                 gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5492                             } else {
5493                                 gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5494                             }
5495                         } else {
5496                             if (input_unsigned) {
5497                                 gen_ushl_i64(cpu_V0, in, tmp64);
5498                             } else {
5499                                 gen_sshl_i64(cpu_V0, in, tmp64);
5500                             }
5501                         }
5502                         tmp = tcg_temp_new_i32();
5503                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5504                         neon_store_reg(rd, pass, tmp);
5505                     } /* for pass */
5506                     tcg_temp_free_i64(tmp64);
5507                 } else {
5508                     if (size == 1) {
5509                         imm = (uint16_t)shift;
5510                         imm |= imm << 16;
5511                     } else {
5512                         /* size == 2 */
5513                         imm = (uint32_t)shift;
5514                     }
5515                     tmp2 = tcg_const_i32(imm);
5516                     tmp4 = neon_load_reg(rm + 1, 0);
5517                     tmp5 = neon_load_reg(rm + 1, 1);
5518                     for (pass = 0; pass < 2; pass++) {
5519                         if (pass == 0) {
5520                             tmp = neon_load_reg(rm, 0);
5521                         } else {
5522                             tmp = tmp4;
5523                         }
5524                         gen_neon_shift_narrow(size, tmp, tmp2, q,
5525                                               input_unsigned);
5526                         if (pass == 0) {
5527                             tmp3 = neon_load_reg(rm, 1);
5528                         } else {
5529                             tmp3 = tmp5;
5530                         }
5531                         gen_neon_shift_narrow(size, tmp3, tmp2, q,
5532                                               input_unsigned);
5533                         tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5534                         tcg_temp_free_i32(tmp);
5535                         tcg_temp_free_i32(tmp3);
5536                         tmp = tcg_temp_new_i32();
5537                         gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5538                         neon_store_reg(rd, pass, tmp);
5539                     } /* for pass */
5540                     tcg_temp_free_i32(tmp2);
5541                 }
5542             } else if (op == 10) {
5543                 /* VSHLL, VMOVL */
5544                 if (q || (rd & 1)) {
5545                     return 1;
5546                 }
5547                 tmp = neon_load_reg(rm, 0);
5548                 tmp2 = neon_load_reg(rm, 1);
5549                 for (pass = 0; pass < 2; pass++) {
5550                     if (pass == 1)
5551                         tmp = tmp2;
5552
5553                     gen_neon_widen(cpu_V0, tmp, size, u);
5554
5555                     if (shift != 0) {
5556                         /* The shift is less than the width of the source
5557                            type, so we can just shift the whole register.  */
5558                         tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5559                         /* Widen the result of shift: we need to clear
5560                          * the potential overflow bits resulting from
5561                          * left bits of the narrow input appearing as
5562                          * right bits of left the neighbour narrow
5563                          * input.  */
5564                         if (size < 2 || !u) {
5565                             uint64_t imm64;
5566                             if (size == 0) {
5567                                 imm = (0xffu >> (8 - shift));
5568                                 imm |= imm << 16;
5569                             } else if (size == 1) {
5570                                 imm = 0xffff >> (16 - shift);
5571                             } else {
5572                                 /* size == 2 */
5573                                 imm = 0xffffffff >> (32 - shift);
5574                             }
5575                             if (size < 2) {
5576                                 imm64 = imm | (((uint64_t)imm) << 32);
5577                             } else {
5578                                 imm64 = imm;
5579                             }
5580                             tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5581                         }
5582                     }
5583                     neon_store_reg64(cpu_V0, rd + pass);
5584                 }
5585             } else if (op >= 14) {
5586                 /* VCVT fixed-point.  */
5587                 TCGv_ptr fpst;
5588                 TCGv_i32 shiftv;
5589                 VFPGenFixPointFn *fn;
5590
5591                 if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5592                     return 1;
5593                 }
5594
5595                 if (!(op & 1)) {
5596                     if (u) {
5597                         fn = gen_helper_vfp_ultos;
5598                     } else {
5599                         fn = gen_helper_vfp_sltos;
5600                     }
5601                 } else {
5602                     if (u) {
5603                         fn = gen_helper_vfp_touls_round_to_zero;
5604                     } else {
5605                         fn = gen_helper_vfp_tosls_round_to_zero;
5606                     }
5607                 }
5608
5609                 /* We have already masked out the must-be-1 top bit of imm6,
5610                  * hence this 32-shift where the ARM ARM has 64-imm6.
5611                  */
5612                 shift = 32 - shift;
5613                 fpst = get_fpstatus_ptr(1);
5614                 shiftv = tcg_const_i32(shift);
5615                 for (pass = 0; pass < (q ? 4 : 2); pass++) {
5616                     TCGv_i32 tmpf = neon_load_reg(rm, pass);
5617                     fn(tmpf, tmpf, shiftv, fpst);
5618                     neon_store_reg(rd, pass, tmpf);
5619                 }
5620                 tcg_temp_free_ptr(fpst);
5621                 tcg_temp_free_i32(shiftv);
5622             } else {
5623                 return 1;
5624             }
5625         } else { /* (insn & 0x00380080) == 0 */
5626             int invert, reg_ofs, vec_size;
5627
5628             if (q && (rd & 1)) {
5629                 return 1;
5630             }
5631
5632             op = (insn >> 8) & 0xf;
5633             /* One register and immediate.  */
5634             imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5635             invert = (insn & (1 << 5)) != 0;
5636             /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5637              * We choose to not special-case this and will behave as if a
5638              * valid constant encoding of 0 had been given.
5639              */
5640             switch (op) {
5641             case 0: case 1:
5642                 /* no-op */
5643                 break;
5644             case 2: case 3:
5645                 imm <<= 8;
5646                 break;
5647             case 4: case 5:
5648                 imm <<= 16;
5649                 break;
5650             case 6: case 7:
5651                 imm <<= 24;
5652                 break;
5653             case 8: case 9:
5654                 imm |= imm << 16;
5655                 break;
5656             case 10: case 11:
5657                 imm = (imm << 8) | (imm << 24);
5658                 break;
5659             case 12:
5660                 imm = (imm << 8) | 0xff;
5661                 break;
5662             case 13:
5663                 imm = (imm << 16) | 0xffff;
5664                 break;
5665             case 14:
5666                 imm |= (imm << 8) | (imm << 16) | (imm << 24);
5667                 if (invert) {
5668                     imm = ~imm;
5669                 }
5670                 break;
5671             case 15:
5672                 if (invert) {
5673                     return 1;
5674                 }
5675                 imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
5676                       | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
5677                 break;
5678             }
5679             if (invert) {
5680                 imm = ~imm;
5681             }
5682
5683             reg_ofs = neon_reg_offset(rd, 0);
5684             vec_size = q ? 16 : 8;
5685
5686             if (op & 1 && op < 12) {
5687                 if (invert) {
5688                     /* The immediate value has already been inverted,
5689                      * so BIC becomes AND.
5690                      */
5691                     tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
5692                                       vec_size, vec_size);
5693                 } else {
5694                     tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
5695                                      vec_size, vec_size);
5696                 }
5697             } else {
5698                 /* VMOV, VMVN.  */
5699                 if (op == 14 && invert) {
5700                     TCGv_i64 t64 = tcg_temp_new_i64();
5701
5702                     for (pass = 0; pass <= q; ++pass) {
5703                         uint64_t val = 0;
5704                         int n;
5705
5706                         for (n = 0; n < 8; n++) {
5707                             if (imm & (1 << (n + pass * 8))) {
5708                                 val |= 0xffull << (n * 8);
5709                             }
5710                         }
5711                         tcg_gen_movi_i64(t64, val);
5712                         neon_store_reg64(t64, rd + pass);
5713                     }
5714                     tcg_temp_free_i64(t64);
5715                 } else {
5716                     tcg_gen_gvec_dup_imm(MO_32, reg_ofs, vec_size,
5717                                          vec_size, imm);
5718                 }
5719             }
5720         }
5721     } else { /* (insn & 0x00800010 == 0x00800000) */
5722         if (size != 3) {
5723             op = (insn >> 8) & 0xf;
5724             if ((insn & (1 << 6)) == 0) {
5725                 /* Three registers of different lengths.  */
5726                 int src1_wide;
5727                 int src2_wide;
5728                 int prewiden;
5729                 /* undefreq: bit 0 : UNDEF if size == 0
5730                  *           bit 1 : UNDEF if size == 1
5731                  *           bit 2 : UNDEF if size == 2
5732                  *           bit 3 : UNDEF if U == 1
5733                  * Note that [2:0] set implies 'always UNDEF'
5734                  */
5735                 int undefreq;
5736                 /* prewiden, src1_wide, src2_wide, undefreq */
5737                 static const int neon_3reg_wide[16][4] = {
5738                     {1, 0, 0, 0}, /* VADDL */
5739                     {1, 1, 0, 0}, /* VADDW */
5740                     {1, 0, 0, 0}, /* VSUBL */
5741                     {1, 1, 0, 0}, /* VSUBW */
5742                     {0, 1, 1, 0}, /* VADDHN */
5743                     {0, 0, 0, 0}, /* VABAL */
5744                     {0, 1, 1, 0}, /* VSUBHN */
5745                     {0, 0, 0, 0}, /* VABDL */
5746                     {0, 0, 0, 0}, /* VMLAL */
5747                     {0, 0, 0, 9}, /* VQDMLAL */
5748                     {0, 0, 0, 0}, /* VMLSL */
5749                     {0, 0, 0, 9}, /* VQDMLSL */
5750                     {0, 0, 0, 0}, /* Integer VMULL */
5751                     {0, 0, 0, 9}, /* VQDMULL */
5752                     {0, 0, 0, 0xa}, /* Polynomial VMULL */
5753                     {0, 0, 0, 7}, /* Reserved: always UNDEF */
5754                 };
5755
5756                 prewiden = neon_3reg_wide[op][0];
5757                 src1_wide = neon_3reg_wide[op][1];
5758                 src2_wide = neon_3reg_wide[op][2];
5759                 undefreq = neon_3reg_wide[op][3];
5760
5761                 if ((undefreq & (1 << size)) ||
5762                     ((undefreq & 8) && u)) {
5763                     return 1;
5764                 }
5765                 if ((src1_wide && (rn & 1)) ||
5766                     (src2_wide && (rm & 1)) ||
5767                     (!src2_wide && (rd & 1))) {
5768                     return 1;
5769                 }
5770
5771                 /* Handle polynomial VMULL in a single pass.  */
5772                 if (op == 14) {
5773                     if (size == 0) {
5774                         /* VMULL.P8 */
5775                         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
5776                                            0, gen_helper_neon_pmull_h);
5777                     } else {
5778                         /* VMULL.P64 */
5779                         if (!dc_isar_feature(aa32_pmull, s)) {
5780                             return 1;
5781                         }
5782                         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
5783                                            0, gen_helper_gvec_pmull_q);
5784                     }
5785                     return 0;
5786                 }
5787
5788                 /* Avoid overlapping operands.  Wide source operands are
5789                    always aligned so will never overlap with wide
5790                    destinations in problematic ways.  */
5791                 if (rd == rm && !src2_wide) {
5792                     tmp = neon_load_reg(rm, 1);
5793                     neon_store_scratch(2, tmp);
5794                 } else if (rd == rn && !src1_wide) {
5795                     tmp = neon_load_reg(rn, 1);
5796                     neon_store_scratch(2, tmp);
5797                 }
5798                 tmp3 = NULL;
5799                 for (pass = 0; pass < 2; pass++) {
5800                     if (src1_wide) {
5801                         neon_load_reg64(cpu_V0, rn + pass);
5802                         tmp = NULL;
5803                     } else {
5804                         if (pass == 1 && rd == rn) {
5805                             tmp = neon_load_scratch(2);
5806                         } else {
5807                             tmp = neon_load_reg(rn, pass);
5808                         }
5809                         if (prewiden) {
5810                             gen_neon_widen(cpu_V0, tmp, size, u);
5811                         }
5812                     }
5813                     if (src2_wide) {
5814                         neon_load_reg64(cpu_V1, rm + pass);
5815                         tmp2 = NULL;
5816                     } else {
5817                         if (pass == 1 && rd == rm) {
5818                             tmp2 = neon_load_scratch(2);
5819                         } else {
5820                             tmp2 = neon_load_reg(rm, pass);
5821                         }
5822                         if (prewiden) {
5823                             gen_neon_widen(cpu_V1, tmp2, size, u);
5824                         }
5825                     }
5826                     switch (op) {
5827                     case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
5828                         gen_neon_addl(size);
5829                         break;
5830                     case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
5831                         gen_neon_subl(size);
5832                         break;
5833                     case 5: case 7: /* VABAL, VABDL */
5834                         switch ((size << 1) | u) {
5835                         case 0:
5836                             gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
5837                             break;
5838                         case 1:
5839                             gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
5840                             break;
5841                         case 2:
5842                             gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
5843                             break;
5844                         case 3:
5845                             gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
5846                             break;
5847                         case 4:
5848                             gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
5849                             break;
5850                         case 5:
5851                             gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
5852                             break;
5853                         default: abort();
5854                         }
5855                         tcg_temp_free_i32(tmp2);
5856                         tcg_temp_free_i32(tmp);
5857                         break;
5858                     case 8: case 9: case 10: case 11: case 12: case 13:
5859                         /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
5860                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5861                         break;
5862                     default: /* 15 is RESERVED: caught earlier  */
5863                         abort();
5864                     }
5865                     if (op == 13) {
5866                         /* VQDMULL */
5867                         gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5868                         neon_store_reg64(cpu_V0, rd + pass);
5869                     } else if (op == 5 || (op >= 8 && op <= 11)) {
5870                         /* Accumulate.  */
5871                         neon_load_reg64(cpu_V1, rd + pass);
5872                         switch (op) {
5873                         case 10: /* VMLSL */
5874                             gen_neon_negl(cpu_V0, size);
5875                             /* Fall through */
5876                         case 5: case 8: /* VABAL, VMLAL */
5877                             gen_neon_addl(size);
5878                             break;
5879                         case 9: case 11: /* VQDMLAL, VQDMLSL */
5880                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5881                             if (op == 11) {
5882                                 gen_neon_negl(cpu_V0, size);
5883                             }
5884                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5885                             break;
5886                         default:
5887                             abort();
5888                         }
5889                         neon_store_reg64(cpu_V0, rd + pass);
5890                     } else if (op == 4 || op == 6) {
5891                         /* Narrowing operation.  */
5892                         tmp = tcg_temp_new_i32();
5893                         if (!u) {
5894                             switch (size) {
5895                             case 0:
5896                                 gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
5897                                 break;
5898                             case 1:
5899                                 gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
5900                                 break;
5901                             case 2:
5902                                 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
5903                                 break;
5904                             default: abort();
5905                             }
5906                         } else {
5907                             switch (size) {
5908                             case 0:
5909                                 gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
5910                                 break;
5911                             case 1:
5912                                 gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
5913                                 break;
5914                             case 2:
5915                                 tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
5916                                 tcg_gen_extrh_i64_i32(tmp, cpu_V0);
5917                                 break;
5918                             default: abort();
5919                             }
5920                         }
5921                         if (pass == 0) {
5922                             tmp3 = tmp;
5923                         } else {
5924                             neon_store_reg(rd, 0, tmp3);
5925                             neon_store_reg(rd, 1, tmp);
5926                         }
5927                     } else {
5928                         /* Write back the result.  */
5929                         neon_store_reg64(cpu_V0, rd + pass);
5930                     }
5931                 }
5932             } else {
5933                 /* Two registers and a scalar. NB that for ops of this form
5934                  * the ARM ARM labels bit 24 as Q, but it is in our variable
5935                  * 'u', not 'q'.
5936                  */
5937                 if (size == 0) {
5938                     return 1;
5939                 }
5940                 switch (op) {
5941                 case 1: /* Float VMLA scalar */
5942                 case 5: /* Floating point VMLS scalar */
5943                 case 9: /* Floating point VMUL scalar */
5944                     if (size == 1) {
5945                         return 1;
5946                     }
5947                     /* fall through */
5948                 case 0: /* Integer VMLA scalar */
5949                 case 4: /* Integer VMLS scalar */
5950                 case 8: /* Integer VMUL scalar */
5951                 case 12: /* VQDMULH scalar */
5952                 case 13: /* VQRDMULH scalar */
5953                     if (u && ((rd | rn) & 1)) {
5954                         return 1;
5955                     }
5956                     tmp = neon_get_scalar(size, rm);
5957                     neon_store_scratch(0, tmp);
5958                     for (pass = 0; pass < (u ? 4 : 2); pass++) {
5959                         tmp = neon_load_scratch(0);
5960                         tmp2 = neon_load_reg(rn, pass);
5961                         if (op == 12) {
5962                             if (size == 1) {
5963                                 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5964                             } else {
5965                                 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5966                             }
5967                         } else if (op == 13) {
5968                             if (size == 1) {
5969                                 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5970                             } else {
5971                                 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5972                             }
5973                         } else if (op & 1) {
5974                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5975                             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5976                             tcg_temp_free_ptr(fpstatus);
5977                         } else {
5978                             switch (size) {
5979                             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5980                             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5981                             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5982                             default: abort();
5983                             }
5984                         }
5985                         tcg_temp_free_i32(tmp2);
5986                         if (op < 8) {
5987                             /* Accumulate.  */
5988                             tmp2 = neon_load_reg(rd, pass);
5989                             switch (op) {
5990                             case 0:
5991                                 gen_neon_add(size, tmp, tmp2);
5992                                 break;
5993                             case 1:
5994                             {
5995                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5996                                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5997                                 tcg_temp_free_ptr(fpstatus);
5998                                 break;
5999                             }
6000                             case 4:
6001                                 gen_neon_rsb(size, tmp, tmp2);
6002                                 break;
6003                             case 5:
6004                             {
6005                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6006                                 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
6007                                 tcg_temp_free_ptr(fpstatus);
6008                                 break;
6009                             }
6010                             default:
6011                                 abort();
6012                             }
6013                             tcg_temp_free_i32(tmp2);
6014                         }
6015                         neon_store_reg(rd, pass, tmp);
6016                     }
6017                     break;
6018                 case 3: /* VQDMLAL scalar */
6019                 case 7: /* VQDMLSL scalar */
6020                 case 11: /* VQDMULL scalar */
6021                     if (u == 1) {
6022                         return 1;
6023                     }
6024                     /* fall through */
6025                 case 2: /* VMLAL sclar */
6026                 case 6: /* VMLSL scalar */
6027                 case 10: /* VMULL scalar */
6028                     if (rd & 1) {
6029                         return 1;
6030                     }
6031                     tmp2 = neon_get_scalar(size, rm);
6032                     /* We need a copy of tmp2 because gen_neon_mull
6033                      * deletes it during pass 0.  */
6034                     tmp4 = tcg_temp_new_i32();
6035                     tcg_gen_mov_i32(tmp4, tmp2);
6036                     tmp3 = neon_load_reg(rn, 1);
6037
6038                     for (pass = 0; pass < 2; pass++) {
6039                         if (pass == 0) {
6040                             tmp = neon_load_reg(rn, 0);
6041                         } else {
6042                             tmp = tmp3;
6043                             tmp2 = tmp4;
6044                         }
6045                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
6046                         if (op != 11) {
6047                             neon_load_reg64(cpu_V1, rd + pass);
6048                         }
6049                         switch (op) {
6050                         case 6:
6051                             gen_neon_negl(cpu_V0, size);
6052                             /* Fall through */
6053                         case 2:
6054                             gen_neon_addl(size);
6055                             break;
6056                         case 3: case 7:
6057                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6058                             if (op == 7) {
6059                                 gen_neon_negl(cpu_V0, size);
6060                             }
6061                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
6062                             break;
6063                         case 10:
6064                             /* no-op */
6065                             break;
6066                         case 11:
6067                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
6068                             break;
6069                         default:
6070                             abort();
6071                         }
6072                         neon_store_reg64(cpu_V0, rd + pass);
6073                     }
6074                     break;
6075                 case 14: /* VQRDMLAH scalar */
6076                 case 15: /* VQRDMLSH scalar */
6077                     {
6078                         NeonGenThreeOpEnvFn *fn;
6079
6080                         if (!dc_isar_feature(aa32_rdm, s)) {
6081                             return 1;
6082                         }
6083                         if (u && ((rd | rn) & 1)) {
6084                             return 1;
6085                         }
6086                         if (op == 14) {
6087                             if (size == 1) {
6088                                 fn = gen_helper_neon_qrdmlah_s16;
6089                             } else {
6090                                 fn = gen_helper_neon_qrdmlah_s32;
6091                             }
6092                         } else {
6093                             if (size == 1) {
6094                                 fn = gen_helper_neon_qrdmlsh_s16;
6095                             } else {
6096                                 fn = gen_helper_neon_qrdmlsh_s32;
6097                             }
6098                         }
6099
6100                         tmp2 = neon_get_scalar(size, rm);
6101                         for (pass = 0; pass < (u ? 4 : 2); pass++) {
6102                             tmp = neon_load_reg(rn, pass);
6103                             tmp3 = neon_load_reg(rd, pass);
6104                             fn(tmp, cpu_env, tmp, tmp2, tmp3);
6105                             tcg_temp_free_i32(tmp3);
6106                             neon_store_reg(rd, pass, tmp);
6107                         }
6108                         tcg_temp_free_i32(tmp2);
6109                     }
6110                     break;
6111                 default:
6112                     g_assert_not_reached();
6113                 }
6114             }
6115         } else { /* size == 3 */
6116             if (!u) {
6117                 /* Extract.  */
6118                 imm = (insn >> 8) & 0xf;
6119
6120                 if (imm > 7 && !q)
6121                     return 1;
6122
6123                 if (q && ((rd | rn | rm) & 1)) {
6124                     return 1;
6125                 }
6126
6127                 if (imm == 0) {
6128                     neon_load_reg64(cpu_V0, rn);
6129                     if (q) {
6130                         neon_load_reg64(cpu_V1, rn + 1);
6131                     }
6132                 } else if (imm == 8) {
6133                     neon_load_reg64(cpu_V0, rn + 1);
6134                     if (q) {
6135                         neon_load_reg64(cpu_V1, rm);
6136                     }
6137                 } else if (q) {
6138                     tmp64 = tcg_temp_new_i64();
6139                     if (imm < 8) {
6140                         neon_load_reg64(cpu_V0, rn);
6141                         neon_load_reg64(tmp64, rn + 1);
6142                     } else {
6143                         neon_load_reg64(cpu_V0, rn + 1);
6144                         neon_load_reg64(tmp64, rm);
6145                     }
6146                     tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
6147                     tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
6148                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6149                     if (imm < 8) {
6150                         neon_load_reg64(cpu_V1, rm);
6151                     } else {
6152                         neon_load_reg64(cpu_V1, rm + 1);
6153                         imm -= 8;
6154                     }
6155                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6156                     tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
6157                     tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6158                     tcg_temp_free_i64(tmp64);
6159                 } else {
6160                     /* BUGFIX */
6161                     neon_load_reg64(cpu_V0, rn);
6162                     tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
6163                     neon_load_reg64(cpu_V1, rm);
6164                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
6165                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6166                 }
6167                 neon_store_reg64(cpu_V0, rd);
6168                 if (q) {
6169                     neon_store_reg64(cpu_V1, rd + 1);
6170                 }
6171             } else if ((insn & (1 << 11)) == 0) {
6172                 /* Two register misc.  */
6173                 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
6174                 size = (insn >> 18) & 3;
6175                 /* UNDEF for unknown op values and bad op-size combinations */
6176                 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
6177                     return 1;
6178                 }
6179                 if (neon_2rm_is_v8_op(op) &&
6180                     !arm_dc_feature(s, ARM_FEATURE_V8)) {
6181                     return 1;
6182                 }
6183                 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6184                     q && ((rm | rd) & 1)) {
6185                     return 1;
6186                 }
6187                 switch (op) {
6188                 case NEON_2RM_VREV64:
6189                     for (pass = 0; pass < (q ? 2 : 1); pass++) {
6190                         tmp = neon_load_reg(rm, pass * 2);
6191                         tmp2 = neon_load_reg(rm, pass * 2 + 1);
6192                         switch (size) {
6193                         case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6194                         case 1: gen_swap_half(tmp); break;
6195                         case 2: /* no-op */ break;
6196                         default: abort();
6197                         }
6198                         neon_store_reg(rd, pass * 2 + 1, tmp);
6199                         if (size == 2) {
6200                             neon_store_reg(rd, pass * 2, tmp2);
6201                         } else {
6202                             switch (size) {
6203                             case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6204                             case 1: gen_swap_half(tmp2); break;
6205                             default: abort();
6206                             }
6207                             neon_store_reg(rd, pass * 2, tmp2);
6208                         }
6209                     }
6210                     break;
6211                 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6212                 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6213                     for (pass = 0; pass < q + 1; pass++) {
6214                         tmp = neon_load_reg(rm, pass * 2);
6215                         gen_neon_widen(cpu_V0, tmp, size, op & 1);
6216                         tmp = neon_load_reg(rm, pass * 2 + 1);
6217                         gen_neon_widen(cpu_V1, tmp, size, op & 1);
6218                         switch (size) {
6219                         case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
6220                         case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
6221                         case 2: tcg_gen_add_i64(CPU_V001); break;
6222                         default: abort();
6223                         }
6224                         if (op >= NEON_2RM_VPADAL) {
6225                             /* Accumulate.  */
6226                             neon_load_reg64(cpu_V1, rd + pass);
6227                             gen_neon_addl(size);
6228                         }
6229                         neon_store_reg64(cpu_V0, rd + pass);
6230                     }
6231                     break;
6232                 case NEON_2RM_VTRN:
6233                     if (size == 2) {
6234                         int n;
6235                         for (n = 0; n < (q ? 4 : 2); n += 2) {
6236                             tmp = neon_load_reg(rm, n);
6237                             tmp2 = neon_load_reg(rd, n + 1);
6238                             neon_store_reg(rm, n, tmp2);
6239                             neon_store_reg(rd, n + 1, tmp);
6240                         }
6241                     } else {
6242                         goto elementwise;
6243                     }
6244                     break;
6245                 case NEON_2RM_VUZP:
6246                     if (gen_neon_unzip(rd, rm, size, q)) {
6247                         return 1;
6248                     }
6249                     break;
6250                 case NEON_2RM_VZIP:
6251                     if (gen_neon_zip(rd, rm, size, q)) {
6252                         return 1;
6253                     }
6254                     break;
6255                 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6256                     /* also VQMOVUN; op field and mnemonics don't line up */
6257                     if (rm & 1) {
6258                         return 1;
6259                     }
6260                     tmp2 = NULL;
6261                     for (pass = 0; pass < 2; pass++) {
6262                         neon_load_reg64(cpu_V0, rm + pass);
6263                         tmp = tcg_temp_new_i32();
6264                         gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6265                                            tmp, cpu_V0);
6266                         if (pass == 0) {
6267                             tmp2 = tmp;
6268                         } else {
6269                             neon_store_reg(rd, 0, tmp2);
6270                             neon_store_reg(rd, 1, tmp);
6271                         }
6272                     }
6273                     break;
6274                 case NEON_2RM_VSHLL:
6275                     if (q || (rd & 1)) {
6276                         return 1;
6277                     }
6278                     tmp = neon_load_reg(rm, 0);
6279                     tmp2 = neon_load_reg(rm, 1);
6280                     for (pass = 0; pass < 2; pass++) {
6281                         if (pass == 1)
6282                             tmp = tmp2;
6283                         gen_neon_widen(cpu_V0, tmp, size, 1);
6284                         tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
6285                         neon_store_reg64(cpu_V0, rd + pass);
6286                     }
6287                     break;
6288                 case NEON_2RM_VCVT_F16_F32:
6289                 {
6290                     TCGv_ptr fpst;
6291                     TCGv_i32 ahp;
6292
6293                     if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6294                         q || (rm & 1)) {
6295                         return 1;
6296                     }
6297                     fpst = get_fpstatus_ptr(true);
6298                     ahp = get_ahp_flag();
6299                     tmp = neon_load_reg(rm, 0);
6300                     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6301                     tmp2 = neon_load_reg(rm, 1);
6302                     gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
6303                     tcg_gen_shli_i32(tmp2, tmp2, 16);
6304                     tcg_gen_or_i32(tmp2, tmp2, tmp);
6305                     tcg_temp_free_i32(tmp);
6306                     tmp = neon_load_reg(rm, 2);
6307                     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6308                     tmp3 = neon_load_reg(rm, 3);
6309                     neon_store_reg(rd, 0, tmp2);
6310                     gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
6311                     tcg_gen_shli_i32(tmp3, tmp3, 16);
6312                     tcg_gen_or_i32(tmp3, tmp3, tmp);
6313                     neon_store_reg(rd, 1, tmp3);
6314                     tcg_temp_free_i32(tmp);
6315                     tcg_temp_free_i32(ahp);
6316                     tcg_temp_free_ptr(fpst);
6317                     break;
6318                 }
6319                 case NEON_2RM_VCVT_F32_F16:
6320                 {
6321                     TCGv_ptr fpst;
6322                     TCGv_i32 ahp;
6323                     if (!dc_isar_feature(aa32_fp16_spconv, s) ||
6324                         q || (rd & 1)) {
6325                         return 1;
6326                     }
6327                     fpst = get_fpstatus_ptr(true);
6328                     ahp = get_ahp_flag();
6329                     tmp3 = tcg_temp_new_i32();
6330                     tmp = neon_load_reg(rm, 0);
6331                     tmp2 = neon_load_reg(rm, 1);
6332                     tcg_gen_ext16u_i32(tmp3, tmp);
6333                     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6334                     neon_store_reg(rd, 0, tmp3);
6335                     tcg_gen_shri_i32(tmp, tmp, 16);
6336                     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
6337                     neon_store_reg(rd, 1, tmp);
6338                     tmp3 = tcg_temp_new_i32();
6339                     tcg_gen_ext16u_i32(tmp3, tmp2);
6340                     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6341                     neon_store_reg(rd, 2, tmp3);
6342                     tcg_gen_shri_i32(tmp2, tmp2, 16);
6343                     gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
6344                     neon_store_reg(rd, 3, tmp2);
6345                     tcg_temp_free_i32(ahp);
6346                     tcg_temp_free_ptr(fpst);
6347                     break;
6348                 }
6349                 case NEON_2RM_AESE: case NEON_2RM_AESMC:
6350                     if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
6351                         return 1;
6352                     }
6353                     ptr1 = vfp_reg_ptr(true, rd);
6354                     ptr2 = vfp_reg_ptr(true, rm);
6355
6356                      /* Bit 6 is the lowest opcode bit; it distinguishes between
6357                       * encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6358                       */
6359                     tmp3 = tcg_const_i32(extract32(insn, 6, 1));
6360
6361                     if (op == NEON_2RM_AESE) {
6362                         gen_helper_crypto_aese(ptr1, ptr2, tmp3);
6363                     } else {
6364                         gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
6365                     }
6366                     tcg_temp_free_ptr(ptr1);
6367                     tcg_temp_free_ptr(ptr2);
6368                     tcg_temp_free_i32(tmp3);
6369                     break;
6370                 case NEON_2RM_SHA1H:
6371                     if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
6372                         return 1;
6373                     }
6374                     ptr1 = vfp_reg_ptr(true, rd);
6375                     ptr2 = vfp_reg_ptr(true, rm);
6376
6377                     gen_helper_crypto_sha1h(ptr1, ptr2);
6378
6379                     tcg_temp_free_ptr(ptr1);
6380                     tcg_temp_free_ptr(ptr2);
6381                     break;
6382                 case NEON_2RM_SHA1SU1:
6383                     if ((rm | rd) & 1) {
6384                             return 1;
6385                     }
6386                     /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
6387                     if (q) {
6388                         if (!dc_isar_feature(aa32_sha2, s)) {
6389                             return 1;
6390                         }
6391                     } else if (!dc_isar_feature(aa32_sha1, s)) {
6392                         return 1;
6393                     }
6394                     ptr1 = vfp_reg_ptr(true, rd);
6395                     ptr2 = vfp_reg_ptr(true, rm);
6396                     if (q) {
6397                         gen_helper_crypto_sha256su0(ptr1, ptr2);
6398                     } else {
6399                         gen_helper_crypto_sha1su1(ptr1, ptr2);
6400                     }
6401                     tcg_temp_free_ptr(ptr1);
6402                     tcg_temp_free_ptr(ptr2);
6403                     break;
6404
6405                 case NEON_2RM_VMVN:
6406                     tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
6407                     break;
6408                 case NEON_2RM_VNEG:
6409                     tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
6410                     break;
6411                 case NEON_2RM_VABS:
6412                     tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
6413                     break;
6414
6415                 case NEON_2RM_VCEQ0:
6416                     gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6417                     break;
6418                 case NEON_2RM_VCGT0:
6419                     gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6420                     break;
6421                 case NEON_2RM_VCLE0:
6422                     gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6423                     break;
6424                 case NEON_2RM_VCGE0:
6425                     gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6426                     break;
6427                 case NEON_2RM_VCLT0:
6428                     gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
6429                     break;
6430
6431                 default:
6432                 elementwise:
6433                     for (pass = 0; pass < (q ? 4 : 2); pass++) {
6434                         tmp = neon_load_reg(rm, pass);
6435                         switch (op) {
6436                         case NEON_2RM_VREV32:
6437                             switch (size) {
6438                             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
6439                             case 1: gen_swap_half(tmp); break;
6440                             default: abort();
6441                             }
6442                             break;
6443                         case NEON_2RM_VREV16:
6444                             gen_rev16(tmp, tmp);
6445                             break;
6446                         case NEON_2RM_VCLS:
6447                             switch (size) {
6448                             case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
6449                             case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
6450                             case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
6451                             default: abort();
6452                             }
6453                             break;
6454                         case NEON_2RM_VCLZ:
6455                             switch (size) {
6456                             case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
6457                             case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
6458                             case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
6459                             default: abort();
6460                             }
6461                             break;
6462                         case NEON_2RM_VCNT:
6463                             gen_helper_neon_cnt_u8(tmp, tmp);
6464                             break;
6465                         case NEON_2RM_VQABS:
6466                             switch (size) {
6467                             case 0:
6468                                 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6469                                 break;
6470                             case 1:
6471                                 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6472                                 break;
6473                             case 2:
6474                                 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6475                                 break;
6476                             default: abort();
6477                             }
6478                             break;
6479                         case NEON_2RM_VQNEG:
6480                             switch (size) {
6481                             case 0:
6482                                 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6483                                 break;
6484                             case 1:
6485                                 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6486                                 break;
6487                             case 2:
6488                                 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6489                                 break;
6490                             default: abort();
6491                             }
6492                             break;
6493                         case NEON_2RM_VCGT0_F:
6494                         {
6495                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6496                             tmp2 = tcg_const_i32(0);
6497                             gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6498                             tcg_temp_free_i32(tmp2);
6499                             tcg_temp_free_ptr(fpstatus);
6500                             break;
6501                         }
6502                         case NEON_2RM_VCGE0_F:
6503                         {
6504                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6505                             tmp2 = tcg_const_i32(0);
6506                             gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6507                             tcg_temp_free_i32(tmp2);
6508                             tcg_temp_free_ptr(fpstatus);
6509                             break;
6510                         }
6511                         case NEON_2RM_VCEQ0_F:
6512                         {
6513                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6514                             tmp2 = tcg_const_i32(0);
6515                             gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6516                             tcg_temp_free_i32(tmp2);
6517                             tcg_temp_free_ptr(fpstatus);
6518                             break;
6519                         }
6520                         case NEON_2RM_VCLE0_F:
6521                         {
6522                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6523                             tmp2 = tcg_const_i32(0);
6524                             gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6525                             tcg_temp_free_i32(tmp2);
6526                             tcg_temp_free_ptr(fpstatus);
6527                             break;
6528                         }
6529                         case NEON_2RM_VCLT0_F:
6530                         {
6531                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6532                             tmp2 = tcg_const_i32(0);
6533                             gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6534                             tcg_temp_free_i32(tmp2);
6535                             tcg_temp_free_ptr(fpstatus);
6536                             break;
6537                         }
6538                         case NEON_2RM_VABS_F:
6539                             gen_helper_vfp_abss(tmp, tmp);
6540                             break;
6541                         case NEON_2RM_VNEG_F:
6542                             gen_helper_vfp_negs(tmp, tmp);
6543                             break;
6544                         case NEON_2RM_VSWP:
6545                             tmp2 = neon_load_reg(rd, pass);
6546                             neon_store_reg(rm, pass, tmp2);
6547                             break;
6548                         case NEON_2RM_VTRN:
6549                             tmp2 = neon_load_reg(rd, pass);
6550                             switch (size) {
6551                             case 0: gen_neon_trn_u8(tmp, tmp2); break;
6552                             case 1: gen_neon_trn_u16(tmp, tmp2); break;
6553                             default: abort();
6554                             }
6555                             neon_store_reg(rm, pass, tmp2);
6556                             break;
6557                         case NEON_2RM_VRINTN:
6558                         case NEON_2RM_VRINTA:
6559                         case NEON_2RM_VRINTM:
6560                         case NEON_2RM_VRINTP:
6561                         case NEON_2RM_VRINTZ:
6562                         {
6563                             TCGv_i32 tcg_rmode;
6564                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6565                             int rmode;
6566
6567                             if (op == NEON_2RM_VRINTZ) {
6568                                 rmode = FPROUNDING_ZERO;
6569                             } else {
6570                                 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
6571                             }
6572
6573                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6574                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6575                                                       cpu_env);
6576                             gen_helper_rints(tmp, tmp, fpstatus);
6577                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6578                                                       cpu_env);
6579                             tcg_temp_free_ptr(fpstatus);
6580                             tcg_temp_free_i32(tcg_rmode);
6581                             break;
6582                         }
6583                         case NEON_2RM_VRINTX:
6584                         {
6585                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6586                             gen_helper_rints_exact(tmp, tmp, fpstatus);
6587                             tcg_temp_free_ptr(fpstatus);
6588                             break;
6589                         }
6590                         case NEON_2RM_VCVTAU:
6591                         case NEON_2RM_VCVTAS:
6592                         case NEON_2RM_VCVTNU:
6593                         case NEON_2RM_VCVTNS:
6594                         case NEON_2RM_VCVTPU:
6595                         case NEON_2RM_VCVTPS:
6596                         case NEON_2RM_VCVTMU:
6597                         case NEON_2RM_VCVTMS:
6598                         {
6599                             bool is_signed = !extract32(insn, 7, 1);
6600                             TCGv_ptr fpst = get_fpstatus_ptr(1);
6601                             TCGv_i32 tcg_rmode, tcg_shift;
6602                             int rmode = fp_decode_rm[extract32(insn, 8, 2)];
6603
6604                             tcg_shift = tcg_const_i32(0);
6605                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6606                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6607                                                       cpu_env);
6608
6609                             if (is_signed) {
6610                                 gen_helper_vfp_tosls(tmp, tmp,
6611                                                      tcg_shift, fpst);
6612                             } else {
6613                                 gen_helper_vfp_touls(tmp, tmp,
6614                                                      tcg_shift, fpst);
6615                             }
6616
6617                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6618                                                       cpu_env);
6619                             tcg_temp_free_i32(tcg_rmode);
6620                             tcg_temp_free_i32(tcg_shift);
6621                             tcg_temp_free_ptr(fpst);
6622                             break;
6623                         }
6624                         case NEON_2RM_VRECPE:
6625                             gen_helper_recpe_u32(tmp, tmp);
6626                             break;
6627                         case NEON_2RM_VRSQRTE:
6628                             gen_helper_rsqrte_u32(tmp, tmp);
6629                             break;
6630                         case NEON_2RM_VRECPE_F:
6631                         {
6632                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6633                             gen_helper_recpe_f32(tmp, tmp, fpstatus);
6634                             tcg_temp_free_ptr(fpstatus);
6635                             break;
6636                         }
6637                         case NEON_2RM_VRSQRTE_F:
6638                         {
6639                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6640                             gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
6641                             tcg_temp_free_ptr(fpstatus);
6642                             break;
6643                         }
6644                         case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6645                         {
6646                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6647                             gen_helper_vfp_sitos(tmp, tmp, fpstatus);
6648                             tcg_temp_free_ptr(fpstatus);
6649                             break;
6650                         }
6651                         case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6652                         {
6653                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6654                             gen_helper_vfp_uitos(tmp, tmp, fpstatus);
6655                             tcg_temp_free_ptr(fpstatus);
6656                             break;
6657                         }
6658                         case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6659                         {
6660                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6661                             gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
6662                             tcg_temp_free_ptr(fpstatus);
6663                             break;
6664                         }
6665                         case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6666                         {
6667                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6668                             gen_helper_vfp_touizs(tmp, tmp, fpstatus);
6669                             tcg_temp_free_ptr(fpstatus);
6670                             break;
6671                         }
6672                         default:
6673                             /* Reserved op values were caught by the
6674                              * neon_2rm_sizes[] check earlier.
6675                              */
6676                             abort();
6677                         }
6678                         neon_store_reg(rd, pass, tmp);
6679                     }
6680                     break;
6681                 }
6682             } else if ((insn & (1 << 10)) == 0) {
6683                 /* VTBL, VTBX.  */
6684                 int n = ((insn >> 8) & 3) + 1;
6685                 if ((rn + n) > 32) {
6686                     /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
6687                      * helper function running off the end of the register file.
6688                      */
6689                     return 1;
6690                 }
6691                 n <<= 3;
6692                 if (insn & (1 << 6)) {
6693                     tmp = neon_load_reg(rd, 0);
6694                 } else {
6695                     tmp = tcg_temp_new_i32();
6696                     tcg_gen_movi_i32(tmp, 0);
6697                 }
6698                 tmp2 = neon_load_reg(rm, 0);
6699                 ptr1 = vfp_reg_ptr(true, rn);
6700                 tmp5 = tcg_const_i32(n);
6701                 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
6702                 tcg_temp_free_i32(tmp);
6703                 if (insn & (1 << 6)) {
6704                     tmp = neon_load_reg(rd, 1);
6705                 } else {
6706                     tmp = tcg_temp_new_i32();
6707                     tcg_gen_movi_i32(tmp, 0);
6708                 }
6709                 tmp3 = neon_load_reg(rm, 1);
6710                 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
6711                 tcg_temp_free_i32(tmp5);
6712                 tcg_temp_free_ptr(ptr1);
6713                 neon_store_reg(rd, 0, tmp2);
6714                 neon_store_reg(rd, 1, tmp3);
6715                 tcg_temp_free_i32(tmp);
6716             } else if ((insn & 0x380) == 0) {
6717                 /* VDUP */
6718                 int element;
6719                 MemOp size;
6720
6721                 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
6722                     return 1;
6723                 }
6724                 if (insn & (1 << 16)) {
6725                     size = MO_8;
6726                     element = (insn >> 17) & 7;
6727                 } else if (insn & (1 << 17)) {
6728                     size = MO_16;
6729                     element = (insn >> 18) & 3;
6730                 } else {
6731                     size = MO_32;
6732                     element = (insn >> 19) & 1;
6733                 }
6734                 tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
6735                                      neon_element_offset(rm, element, size),
6736                                      q ? 16 : 8, q ? 16 : 8);
6737             } else {
6738                 return 1;
6739             }
6740         }
6741     }
6742     return 0;
6743 }
6744
6745 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
6746 {
6747     int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
6748     const ARMCPRegInfo *ri;
6749
6750     cpnum = (insn >> 8) & 0xf;
6751
6752     /* First check for coprocessor space used for XScale/iwMMXt insns */
6753     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
6754         if (extract32(s->c15_cpar, cpnum, 1) == 0) {
6755             return 1;
6756         }
6757         if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
6758             return disas_iwmmxt_insn(s, insn);
6759         } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
6760             return disas_dsp_insn(s, insn);
6761         }
6762         return 1;
6763     }
6764
6765     /* Otherwise treat as a generic register access */
6766     is64 = (insn & (1 << 25)) == 0;
6767     if (!is64 && ((insn & (1 << 4)) == 0)) {
6768         /* cdp */
6769         return 1;
6770     }
6771
6772     crm = insn & 0xf;
6773     if (is64) {
6774         crn = 0;
6775         opc1 = (insn >> 4) & 0xf;
6776         opc2 = 0;
6777         rt2 = (insn >> 16) & 0xf;
6778     } else {
6779         crn = (insn >> 16) & 0xf;
6780         opc1 = (insn >> 21) & 7;
6781         opc2 = (insn >> 5) & 7;
6782         rt2 = 0;
6783     }
6784     isread = (insn >> 20) & 1;
6785     rt = (insn >> 12) & 0xf;
6786
6787     ri = get_arm_cp_reginfo(s->cp_regs,
6788             ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
6789     if (ri) {
6790         bool need_exit_tb;
6791
6792         /* Check access permissions */
6793         if (!cp_access_ok(s->current_el, ri, isread)) {
6794             return 1;
6795         }
6796
6797         if (s->hstr_active || ri->accessfn ||
6798             (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
6799             /* Emit code to perform further access permissions checks at
6800              * runtime; this may result in an exception.
6801              * Note that on XScale all cp0..c13 registers do an access check
6802              * call in order to handle c15_cpar.
6803              */
6804             TCGv_ptr tmpptr;
6805             TCGv_i32 tcg_syn, tcg_isread;
6806             uint32_t syndrome;
6807
6808             /* Note that since we are an implementation which takes an
6809              * exception on a trapped conditional instruction only if the
6810              * instruction passes its condition code check, we can take
6811              * advantage of the clause in the ARM ARM that allows us to set
6812              * the COND field in the instruction to 0xE in all cases.
6813              * We could fish the actual condition out of the insn (ARM)
6814              * or the condexec bits (Thumb) but it isn't necessary.
6815              */
6816             switch (cpnum) {
6817             case 14:
6818                 if (is64) {
6819                     syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6820                                                  isread, false);
6821                 } else {
6822                     syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6823                                                 rt, isread, false);
6824                 }
6825                 break;
6826             case 15:
6827                 if (is64) {
6828                     syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6829                                                  isread, false);
6830                 } else {
6831                     syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6832                                                 rt, isread, false);
6833                 }
6834                 break;
6835             default:
6836                 /* ARMv8 defines that only coprocessors 14 and 15 exist,
6837                  * so this can only happen if this is an ARMv7 or earlier CPU,
6838                  * in which case the syndrome information won't actually be
6839                  * guest visible.
6840                  */
6841                 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
6842                 syndrome = syn_uncategorized();
6843                 break;
6844             }
6845
6846             gen_set_condexec(s);
6847             gen_set_pc_im(s, s->pc_curr);
6848             tmpptr = tcg_const_ptr(ri);
6849             tcg_syn = tcg_const_i32(syndrome);
6850             tcg_isread = tcg_const_i32(isread);
6851             gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
6852                                            tcg_isread);
6853             tcg_temp_free_ptr(tmpptr);
6854             tcg_temp_free_i32(tcg_syn);
6855             tcg_temp_free_i32(tcg_isread);
6856         } else if (ri->type & ARM_CP_RAISES_EXC) {
6857             /*
6858              * The readfn or writefn might raise an exception;
6859              * synchronize the CPU state in case it does.
6860              */
6861             gen_set_condexec(s);
6862             gen_set_pc_im(s, s->pc_curr);
6863         }
6864
6865         /* Handle special cases first */
6866         switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
6867         case ARM_CP_NOP:
6868             return 0;
6869         case ARM_CP_WFI:
6870             if (isread) {
6871                 return 1;
6872             }
6873             gen_set_pc_im(s, s->base.pc_next);
6874             s->base.is_jmp = DISAS_WFI;
6875             return 0;
6876         default:
6877             break;
6878         }
6879
6880         if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
6881             gen_io_start();
6882         }
6883
6884         if (isread) {
6885             /* Read */
6886             if (is64) {
6887                 TCGv_i64 tmp64;
6888                 TCGv_i32 tmp;
6889                 if (ri->type & ARM_CP_CONST) {
6890                     tmp64 = tcg_const_i64(ri->resetvalue);
6891                 } else if (ri->readfn) {
6892                     TCGv_ptr tmpptr;
6893                     tmp64 = tcg_temp_new_i64();
6894                     tmpptr = tcg_const_ptr(ri);
6895                     gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
6896                     tcg_temp_free_ptr(tmpptr);
6897                 } else {
6898                     tmp64 = tcg_temp_new_i64();
6899                     tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
6900                 }
6901                 tmp = tcg_temp_new_i32();
6902                 tcg_gen_extrl_i64_i32(tmp, tmp64);
6903                 store_reg(s, rt, tmp);
6904                 tmp = tcg_temp_new_i32();
6905                 tcg_gen_extrh_i64_i32(tmp, tmp64);
6906                 tcg_temp_free_i64(tmp64);
6907                 store_reg(s, rt2, tmp);
6908             } else {
6909                 TCGv_i32 tmp;
6910                 if (ri->type & ARM_CP_CONST) {
6911                     tmp = tcg_const_i32(ri->resetvalue);
6912                 } else if (ri->readfn) {
6913                     TCGv_ptr tmpptr;
6914                     tmp = tcg_temp_new_i32();
6915                     tmpptr = tcg_const_ptr(ri);
6916                     gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
6917                     tcg_temp_free_ptr(tmpptr);
6918                 } else {
6919                     tmp = load_cpu_offset(ri->fieldoffset);
6920                 }
6921                 if (rt == 15) {
6922                     /* Destination register of r15 for 32 bit loads sets
6923                      * the condition codes from the high 4 bits of the value
6924                      */
6925                     gen_set_nzcv(tmp);
6926                     tcg_temp_free_i32(tmp);
6927                 } else {
6928                     store_reg(s, rt, tmp);
6929                 }
6930             }
6931         } else {
6932             /* Write */
6933             if (ri->type & ARM_CP_CONST) {
6934                 /* If not forbidden by access permissions, treat as WI */
6935                 return 0;
6936             }
6937
6938             if (is64) {
6939                 TCGv_i32 tmplo, tmphi;
6940                 TCGv_i64 tmp64 = tcg_temp_new_i64();
6941                 tmplo = load_reg(s, rt);
6942                 tmphi = load_reg(s, rt2);
6943                 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
6944                 tcg_temp_free_i32(tmplo);
6945                 tcg_temp_free_i32(tmphi);
6946                 if (ri->writefn) {
6947                     TCGv_ptr tmpptr = tcg_const_ptr(ri);
6948                     gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
6949                     tcg_temp_free_ptr(tmpptr);
6950                 } else {
6951                     tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
6952                 }
6953                 tcg_temp_free_i64(tmp64);
6954             } else {
6955                 if (ri->writefn) {
6956                     TCGv_i32 tmp;
6957                     TCGv_ptr tmpptr;
6958                     tmp = load_reg(s, rt);
6959                     tmpptr = tcg_const_ptr(ri);
6960                     gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
6961                     tcg_temp_free_ptr(tmpptr);
6962                     tcg_temp_free_i32(tmp);
6963                 } else {
6964                     TCGv_i32 tmp = load_reg(s, rt);
6965                     store_cpu_offset(tmp, ri->fieldoffset);
6966                 }
6967             }
6968         }
6969
6970         /* I/O operations must end the TB here (whether read or write) */
6971         need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
6972                         (ri->type & ARM_CP_IO));
6973
6974         if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
6975             /*
6976              * A write to any coprocessor register that ends a TB
6977              * must rebuild the hflags for the next TB.
6978              */
6979             TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
6980             if (arm_dc_feature(s, ARM_FEATURE_M)) {
6981                 gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
6982             } else {
6983                 if (ri->type & ARM_CP_NEWEL) {
6984                     gen_helper_rebuild_hflags_a32_newel(cpu_env);
6985                 } else {
6986                     gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
6987                 }
6988             }
6989             tcg_temp_free_i32(tcg_el);
6990             /*
6991              * We default to ending the TB on a coprocessor register write,
6992              * but allow this to be suppressed by the register definition
6993              * (usually only necessary to work around guest bugs).
6994              */
6995             need_exit_tb = true;
6996         }
6997         if (need_exit_tb) {
6998             gen_lookup_tb(s);
6999         }
7000
7001         return 0;
7002     }
7003
7004     /* Unknown register; this might be a guest error or a QEMU
7005      * unimplemented feature.
7006      */
7007     if (is64) {
7008         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7009                       "64 bit system register cp:%d opc1: %d crm:%d "
7010                       "(%s)\n",
7011                       isread ? "read" : "write", cpnum, opc1, crm,
7012                       s->ns ? "non-secure" : "secure");
7013     } else {
7014         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7015                       "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7016                       "(%s)\n",
7017                       isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7018                       s->ns ? "non-secure" : "secure");
7019     }
7020
7021     return 1;
7022 }
7023
7024
7025 /* Store a 64-bit value to a register pair.  Clobbers val.  */
7026 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
7027 {
7028     TCGv_i32 tmp;
7029     tmp = tcg_temp_new_i32();
7030     tcg_gen_extrl_i64_i32(tmp, val);
7031     store_reg(s, rlow, tmp);
7032     tmp = tcg_temp_new_i32();
7033     tcg_gen_extrh_i64_i32(tmp, val);
7034     store_reg(s, rhigh, tmp);
7035 }
7036
7037 /* load and add a 64-bit value from a register pair.  */
7038 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
7039 {
7040     TCGv_i64 tmp;
7041     TCGv_i32 tmpl;
7042     TCGv_i32 tmph;
7043
7044     /* Load 64-bit value rd:rn.  */
7045     tmpl = load_reg(s, rlow);
7046     tmph = load_reg(s, rhigh);
7047     tmp = tcg_temp_new_i64();
7048     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7049     tcg_temp_free_i32(tmpl);
7050     tcg_temp_free_i32(tmph);
7051     tcg_gen_add_i64(val, val, tmp);
7052     tcg_temp_free_i64(tmp);
7053 }
7054
7055 /* Set N and Z flags from hi|lo.  */
7056 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7057 {
7058     tcg_gen_mov_i32(cpu_NF, hi);
7059     tcg_gen_or_i32(cpu_ZF, lo, hi);
7060 }
7061
7062 /* Load/Store exclusive instructions are implemented by remembering
7063    the value/address loaded, and seeing if these are the same
7064    when the store is performed.  This should be sufficient to implement
7065    the architecturally mandated semantics, and avoids having to monitor
7066    regular stores.  The compare vs the remembered value is done during
7067    the cmpxchg operation, but we must compare the addresses manually.  */
7068 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
7069                                TCGv_i32 addr, int size)
7070 {
7071     TCGv_i32 tmp = tcg_temp_new_i32();
7072     MemOp opc = size | MO_ALIGN | s->be_data;
7073
7074     s->is_ldex = true;
7075
7076     if (size == 3) {
7077         TCGv_i32 tmp2 = tcg_temp_new_i32();
7078         TCGv_i64 t64 = tcg_temp_new_i64();
7079
7080         /* For AArch32, architecturally the 32-bit word at the lowest
7081          * address is always Rt and the one at addr+4 is Rt2, even if
7082          * the CPU is big-endian. That means we don't want to do a
7083          * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
7084          * for an architecturally 64-bit access, but instead do a
7085          * 64-bit access using MO_BE if appropriate and then split
7086          * the two halves.
7087          * This only makes a difference for BE32 user-mode, where
7088          * frob64() must not flip the two halves of the 64-bit data
7089          * but this code must treat BE32 user-mode like BE32 system.
7090          */
7091         TCGv taddr = gen_aa32_addr(s, addr, opc);
7092
7093         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
7094         tcg_temp_free(taddr);
7095         tcg_gen_mov_i64(cpu_exclusive_val, t64);
7096         if (s->be_data == MO_BE) {
7097             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
7098         } else {
7099             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
7100         }
7101         tcg_temp_free_i64(t64);
7102
7103         store_reg(s, rt2, tmp2);
7104     } else {
7105         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
7106         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7107     }
7108
7109     store_reg(s, rt, tmp);
7110     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7111 }
7112
7113 static void gen_clrex(DisasContext *s)
7114 {
7115     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7116 }
7117
7118 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
7119                                 TCGv_i32 addr, int size)
7120 {
7121     TCGv_i32 t0, t1, t2;
7122     TCGv_i64 extaddr;
7123     TCGv taddr;
7124     TCGLabel *done_label;
7125     TCGLabel *fail_label;
7126     MemOp opc = size | MO_ALIGN | s->be_data;
7127
7128     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
7129          [addr] = {Rt};
7130          {Rd} = 0;
7131        } else {
7132          {Rd} = 1;
7133        } */
7134     fail_label = gen_new_label();
7135     done_label = gen_new_label();
7136     extaddr = tcg_temp_new_i64();
7137     tcg_gen_extu_i32_i64(extaddr, addr);
7138     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7139     tcg_temp_free_i64(extaddr);
7140
7141     taddr = gen_aa32_addr(s, addr, opc);
7142     t0 = tcg_temp_new_i32();
7143     t1 = load_reg(s, rt);
7144     if (size == 3) {
7145         TCGv_i64 o64 = tcg_temp_new_i64();
7146         TCGv_i64 n64 = tcg_temp_new_i64();
7147
7148         t2 = load_reg(s, rt2);
7149         /* For AArch32, architecturally the 32-bit word at the lowest
7150          * address is always Rt and the one at addr+4 is Rt2, even if
7151          * the CPU is big-endian. Since we're going to treat this as a
7152          * single 64-bit BE store, we need to put the two halves in the
7153          * opposite order for BE to LE, so that they end up in the right
7154          * places.
7155          * We don't want gen_aa32_frob64() because that does the wrong
7156          * thing for BE32 usermode.
7157          */
7158         if (s->be_data == MO_BE) {
7159             tcg_gen_concat_i32_i64(n64, t2, t1);
7160         } else {
7161             tcg_gen_concat_i32_i64(n64, t1, t2);
7162         }
7163         tcg_temp_free_i32(t2);
7164
7165         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
7166                                    get_mem_index(s), opc);
7167         tcg_temp_free_i64(n64);
7168
7169         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
7170         tcg_gen_extrl_i64_i32(t0, o64);
7171
7172         tcg_temp_free_i64(o64);
7173     } else {
7174         t2 = tcg_temp_new_i32();
7175         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
7176         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
7177         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
7178         tcg_temp_free_i32(t2);
7179     }
7180     tcg_temp_free_i32(t1);
7181     tcg_temp_free(taddr);
7182     tcg_gen_mov_i32(cpu_R[rd], t0);
7183     tcg_temp_free_i32(t0);
7184     tcg_gen_br(done_label);
7185
7186     gen_set_label(fail_label);
7187     tcg_gen_movi_i32(cpu_R[rd], 1);
7188     gen_set_label(done_label);
7189     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
7190 }
7191
7192 /* gen_srs:
7193  * @env: CPUARMState
7194  * @s: DisasContext
7195  * @mode: mode field from insn (which stack to store to)
7196  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7197  * @writeback: true if writeback bit set
7198  *
7199  * Generate code for the SRS (Store Return State) insn.
7200  */
7201 static void gen_srs(DisasContext *s,
7202                     uint32_t mode, uint32_t amode, bool writeback)
7203 {
7204     int32_t offset;
7205     TCGv_i32 addr, tmp;
7206     bool undef = false;
7207
7208     /* SRS is:
7209      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
7210      *   and specified mode is monitor mode
7211      * - UNDEFINED in Hyp mode
7212      * - UNPREDICTABLE in User or System mode
7213      * - UNPREDICTABLE if the specified mode is:
7214      * -- not implemented
7215      * -- not a valid mode number
7216      * -- a mode that's at a higher exception level
7217      * -- Monitor, if we are Non-secure
7218      * For the UNPREDICTABLE cases we choose to UNDEF.
7219      */
7220     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
7221         gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
7222         return;
7223     }
7224
7225     if (s->current_el == 0 || s->current_el == 2) {
7226         undef = true;
7227     }
7228
7229     switch (mode) {
7230     case ARM_CPU_MODE_USR:
7231     case ARM_CPU_MODE_FIQ:
7232     case ARM_CPU_MODE_IRQ:
7233     case ARM_CPU_MODE_SVC:
7234     case ARM_CPU_MODE_ABT:
7235     case ARM_CPU_MODE_UND:
7236     case ARM_CPU_MODE_SYS:
7237         break;
7238     case ARM_CPU_MODE_HYP:
7239         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
7240             undef = true;
7241         }
7242         break;
7243     case ARM_CPU_MODE_MON:
7244         /* No need to check specifically for "are we non-secure" because
7245          * we've already made EL0 UNDEF and handled the trap for S-EL1;
7246          * so if this isn't EL3 then we must be non-secure.
7247          */
7248         if (s->current_el != 3) {
7249             undef = true;
7250         }
7251         break;
7252     default:
7253         undef = true;
7254     }
7255
7256     if (undef) {
7257         unallocated_encoding(s);
7258         return;
7259     }
7260
7261     addr = tcg_temp_new_i32();
7262     tmp = tcg_const_i32(mode);
7263     /* get_r13_banked() will raise an exception if called from System mode */
7264     gen_set_condexec(s);
7265     gen_set_pc_im(s, s->pc_curr);
7266     gen_helper_get_r13_banked(addr, cpu_env, tmp);
7267     tcg_temp_free_i32(tmp);
7268     switch (amode) {
7269     case 0: /* DA */
7270         offset = -4;
7271         break;
7272     case 1: /* IA */
7273         offset = 0;
7274         break;
7275     case 2: /* DB */
7276         offset = -8;
7277         break;
7278     case 3: /* IB */
7279         offset = 4;
7280         break;
7281     default:
7282         abort();
7283     }
7284     tcg_gen_addi_i32(addr, addr, offset);
7285     tmp = load_reg(s, 14);
7286     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7287     tcg_temp_free_i32(tmp);
7288     tmp = load_cpu_field(spsr);
7289     tcg_gen_addi_i32(addr, addr, 4);
7290     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7291     tcg_temp_free_i32(tmp);
7292     if (writeback) {
7293         switch (amode) {
7294         case 0:
7295             offset = -8;
7296             break;
7297         case 1:
7298             offset = 4;
7299             break;
7300         case 2:
7301             offset = -4;
7302             break;
7303         case 3:
7304             offset = 0;
7305             break;
7306         default:
7307             abort();
7308         }
7309         tcg_gen_addi_i32(addr, addr, offset);
7310         tmp = tcg_const_i32(mode);
7311         gen_helper_set_r13_banked(cpu_env, tmp, addr);
7312         tcg_temp_free_i32(tmp);
7313     }
7314     tcg_temp_free_i32(addr);
7315     s->base.is_jmp = DISAS_UPDATE;
7316 }
7317
7318 /* Generate a label used for skipping this instruction */
7319 static void arm_gen_condlabel(DisasContext *s)
7320 {
7321     if (!s->condjmp) {
7322         s->condlabel = gen_new_label();
7323         s->condjmp = 1;
7324     }
7325 }
7326
7327 /* Skip this instruction if the ARM condition is false */
7328 static void arm_skip_unless(DisasContext *s, uint32_t cond)
7329 {
7330     arm_gen_condlabel(s);
7331     arm_gen_test_cc(cond ^ 1, s->condlabel);
7332 }
7333
7334
7335 /*
7336  * Constant expanders for the decoders.
7337  */
7338
7339 static int negate(DisasContext *s, int x)
7340 {
7341     return -x;
7342 }
7343
7344 static int plus_2(DisasContext *s, int x)
7345 {
7346     return x + 2;
7347 }
7348
7349 static int times_2(DisasContext *s, int x)
7350 {
7351     return x * 2;
7352 }
7353
7354 static int times_4(DisasContext *s, int x)
7355 {
7356     return x * 4;
7357 }
7358
7359 /* Return only the rotation part of T32ExpandImm.  */
7360 static int t32_expandimm_rot(DisasContext *s, int x)
7361 {
7362     return x & 0xc00 ? extract32(x, 7, 5) : 0;
7363 }
7364
7365 /* Return the unrotated immediate from T32ExpandImm.  */
7366 static int t32_expandimm_imm(DisasContext *s, int x)
7367 {
7368     int imm = extract32(x, 0, 8);
7369
7370     switch (extract32(x, 8, 4)) {
7371     case 0: /* XY */
7372         /* Nothing to do.  */
7373         break;
7374     case 1: /* 00XY00XY */
7375         imm *= 0x00010001;
7376         break;
7377     case 2: /* XY00XY00 */
7378         imm *= 0x01000100;
7379         break;
7380     case 3: /* XYXYXYXY */
7381         imm *= 0x01010101;
7382         break;
7383     default:
7384         /* Rotated constant.  */
7385         imm |= 0x80;
7386         break;
7387     }
7388     return imm;
7389 }
7390
7391 static int t32_branch24(DisasContext *s, int x)
7392 {
7393     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
7394     x ^= !(x < 0) * (3 << 21);
7395     /* Append the final zero.  */
7396     return x << 1;
7397 }
7398
7399 static int t16_setflags(DisasContext *s)
7400 {
7401     return s->condexec_mask == 0;
7402 }
7403
7404 static int t16_push_list(DisasContext *s, int x)
7405 {
7406     return (x & 0xff) | (x & 0x100) << (14 - 8);
7407 }
7408
7409 static int t16_pop_list(DisasContext *s, int x)
7410 {
7411     return (x & 0xff) | (x & 0x100) << (15 - 8);
7412 }
7413
7414 /*
7415  * Include the generated decoders.
7416  */
7417
7418 #include "decode-a32.inc.c"
7419 #include "decode-a32-uncond.inc.c"
7420 #include "decode-t32.inc.c"
7421 #include "decode-t16.inc.c"
7422
7423 /* Helpers to swap operands for reverse-subtract.  */
7424 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7425 {
7426     tcg_gen_sub_i32(dst, b, a);
7427 }
7428
7429 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7430 {
7431     gen_sub_CC(dst, b, a);
7432 }
7433
7434 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7435 {
7436     gen_sub_carry(dest, b, a);
7437 }
7438
7439 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7440 {
7441     gen_sbc_CC(dest, b, a);
7442 }
7443
7444 /*
7445  * Helpers for the data processing routines.
7446  *
7447  * After the computation store the results back.
7448  * This may be suppressed altogether (STREG_NONE), require a runtime
7449  * check against the stack limits (STREG_SP_CHECK), or generate an
7450  * exception return.  Oh, or store into a register.
7451  *
7452  * Always return true, indicating success for a trans_* function.
7453  */
7454 typedef enum {
7455    STREG_NONE,
7456    STREG_NORMAL,
7457    STREG_SP_CHECK,
7458    STREG_EXC_RET,
7459 } StoreRegKind;
7460
7461 static bool store_reg_kind(DisasContext *s, int rd,
7462                             TCGv_i32 val, StoreRegKind kind)
7463 {
7464     switch (kind) {
7465     case STREG_NONE:
7466         tcg_temp_free_i32(val);
7467         return true;
7468     case STREG_NORMAL:
7469         /* See ALUWritePC: Interworking only from a32 mode. */
7470         if (s->thumb) {
7471             store_reg(s, rd, val);
7472         } else {
7473             store_reg_bx(s, rd, val);
7474         }
7475         return true;
7476     case STREG_SP_CHECK:
7477         store_sp_checked(s, val);
7478         return true;
7479     case STREG_EXC_RET:
7480         gen_exception_return(s, val);
7481         return true;
7482     }
7483     g_assert_not_reached();
7484 }
7485
7486 /*
7487  * Data Processing (register)
7488  *
7489  * Operate, with set flags, one register source,
7490  * one immediate shifted register source, and a destination.
7491  */
7492 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
7493                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7494                          int logic_cc, StoreRegKind kind)
7495 {
7496     TCGv_i32 tmp1, tmp2;
7497
7498     tmp2 = load_reg(s, a->rm);
7499     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
7500     tmp1 = load_reg(s, a->rn);
7501
7502     gen(tmp1, tmp1, tmp2);
7503     tcg_temp_free_i32(tmp2);
7504
7505     if (logic_cc) {
7506         gen_logic_CC(tmp1);
7507     }
7508     return store_reg_kind(s, a->rd, tmp1, kind);
7509 }
7510
7511 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
7512                          void (*gen)(TCGv_i32, TCGv_i32),
7513                          int logic_cc, StoreRegKind kind)
7514 {
7515     TCGv_i32 tmp;
7516
7517     tmp = load_reg(s, a->rm);
7518     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
7519
7520     gen(tmp, tmp);
7521     if (logic_cc) {
7522         gen_logic_CC(tmp);
7523     }
7524     return store_reg_kind(s, a->rd, tmp, kind);
7525 }
7526
7527 /*
7528  * Data-processing (register-shifted register)
7529  *
7530  * Operate, with set flags, one register source,
7531  * one register shifted register source, and a destination.
7532  */
7533 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
7534                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7535                          int logic_cc, StoreRegKind kind)
7536 {
7537     TCGv_i32 tmp1, tmp2;
7538
7539     tmp1 = load_reg(s, a->rs);
7540     tmp2 = load_reg(s, a->rm);
7541     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7542     tmp1 = load_reg(s, a->rn);
7543
7544     gen(tmp1, tmp1, tmp2);
7545     tcg_temp_free_i32(tmp2);
7546
7547     if (logic_cc) {
7548         gen_logic_CC(tmp1);
7549     }
7550     return store_reg_kind(s, a->rd, tmp1, kind);
7551 }
7552
7553 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
7554                          void (*gen)(TCGv_i32, TCGv_i32),
7555                          int logic_cc, StoreRegKind kind)
7556 {
7557     TCGv_i32 tmp1, tmp2;
7558
7559     tmp1 = load_reg(s, a->rs);
7560     tmp2 = load_reg(s, a->rm);
7561     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7562
7563     gen(tmp2, tmp2);
7564     if (logic_cc) {
7565         gen_logic_CC(tmp2);
7566     }
7567     return store_reg_kind(s, a->rd, tmp2, kind);
7568 }
7569
7570 /*
7571  * Data-processing (immediate)
7572  *
7573  * Operate, with set flags, one register source,
7574  * one rotated immediate, and a destination.
7575  *
7576  * Note that logic_cc && a->rot setting CF based on the msb of the
7577  * immediate is the reason why we must pass in the unrotated form
7578  * of the immediate.
7579  */
7580 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
7581                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7582                          int logic_cc, StoreRegKind kind)
7583 {
7584     TCGv_i32 tmp1, tmp2;
7585     uint32_t imm;
7586
7587     imm = ror32(a->imm, a->rot);
7588     if (logic_cc && a->rot) {
7589         tcg_gen_movi_i32(cpu_CF, imm >> 31);
7590     }
7591     tmp2 = tcg_const_i32(imm);
7592     tmp1 = load_reg(s, a->rn);
7593
7594     gen(tmp1, tmp1, tmp2);
7595     tcg_temp_free_i32(tmp2);
7596
7597     if (logic_cc) {
7598         gen_logic_CC(tmp1);
7599     }
7600     return store_reg_kind(s, a->rd, tmp1, kind);
7601 }
7602
7603 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
7604                          void (*gen)(TCGv_i32, TCGv_i32),
7605                          int logic_cc, StoreRegKind kind)
7606 {
7607     TCGv_i32 tmp;
7608     uint32_t imm;
7609
7610     imm = ror32(a->imm, a->rot);
7611     if (logic_cc && a->rot) {
7612         tcg_gen_movi_i32(cpu_CF, imm >> 31);
7613     }
7614     tmp = tcg_const_i32(imm);
7615
7616     gen(tmp, tmp);
7617     if (logic_cc) {
7618         gen_logic_CC(tmp);
7619     }
7620     return store_reg_kind(s, a->rd, tmp, kind);
7621 }
7622
7623 #define DO_ANY3(NAME, OP, L, K)                                         \
7624     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
7625     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
7626     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
7627     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
7628     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
7629     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
7630
7631 #define DO_ANY2(NAME, OP, L, K)                                         \
7632     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
7633     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
7634     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
7635     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
7636     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
7637     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
7638
7639 #define DO_CMP2(NAME, OP, L)                                            \
7640     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
7641     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
7642     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
7643     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
7644     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
7645     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
7646
7647 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
7648 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
7649 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
7650 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
7651
7652 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
7653 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
7654 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
7655 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
7656
7657 DO_CMP2(TST, tcg_gen_and_i32, true)
7658 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
7659 DO_CMP2(CMN, gen_add_CC, false)
7660 DO_CMP2(CMP, gen_sub_CC, false)
7661
7662 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
7663         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
7664
7665 /*
7666  * Note for the computation of StoreRegKind we return out of the
7667  * middle of the functions that are expanded by DO_ANY3, and that
7668  * we modify a->s via that parameter before it is used by OP.
7669  */
7670 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
7671         ({
7672             StoreRegKind ret = STREG_NORMAL;
7673             if (a->rd == 15 && a->s) {
7674                 /*
7675                  * See ALUExceptionReturn:
7676                  * In User mode, UNPREDICTABLE; we choose UNDEF.
7677                  * In Hyp mode, UNDEFINED.
7678                  */
7679                 if (IS_USER(s) || s->current_el == 2) {
7680                     unallocated_encoding(s);
7681                     return true;
7682                 }
7683                 /* There is no writeback of nzcv to PSTATE.  */
7684                 a->s = 0;
7685                 ret = STREG_EXC_RET;
7686             } else if (a->rd == 13 && a->rn == 13) {
7687                 ret = STREG_SP_CHECK;
7688             }
7689             ret;
7690         }))
7691
7692 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
7693         ({
7694             StoreRegKind ret = STREG_NORMAL;
7695             if (a->rd == 15 && a->s) {
7696                 /*
7697                  * See ALUExceptionReturn:
7698                  * In User mode, UNPREDICTABLE; we choose UNDEF.
7699                  * In Hyp mode, UNDEFINED.
7700                  */
7701                 if (IS_USER(s) || s->current_el == 2) {
7702                     unallocated_encoding(s);
7703                     return true;
7704                 }
7705                 /* There is no writeback of nzcv to PSTATE.  */
7706                 a->s = 0;
7707                 ret = STREG_EXC_RET;
7708             } else if (a->rd == 13) {
7709                 ret = STREG_SP_CHECK;
7710             }
7711             ret;
7712         }))
7713
7714 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
7715
7716 /*
7717  * ORN is only available with T32, so there is no register-shifted-register
7718  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
7719  */
7720 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
7721 {
7722     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7723 }
7724
7725 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
7726 {
7727     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7728 }
7729
7730 #undef DO_ANY3
7731 #undef DO_ANY2
7732 #undef DO_CMP2
7733
7734 static bool trans_ADR(DisasContext *s, arg_ri *a)
7735 {
7736     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
7737     return true;
7738 }
7739
7740 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
7741 {
7742     TCGv_i32 tmp;
7743
7744     if (!ENABLE_ARCH_6T2) {
7745         return false;
7746     }
7747
7748     tmp = tcg_const_i32(a->imm);
7749     store_reg(s, a->rd, tmp);
7750     return true;
7751 }
7752
7753 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
7754 {
7755     TCGv_i32 tmp;
7756
7757     if (!ENABLE_ARCH_6T2) {
7758         return false;
7759     }
7760
7761     tmp = load_reg(s, a->rd);
7762     tcg_gen_ext16u_i32(tmp, tmp);
7763     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
7764     store_reg(s, a->rd, tmp);
7765     return true;
7766 }
7767
7768 /*
7769  * Multiply and multiply accumulate
7770  */
7771
7772 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
7773 {
7774     TCGv_i32 t1, t2;
7775
7776     t1 = load_reg(s, a->rn);
7777     t2 = load_reg(s, a->rm);
7778     tcg_gen_mul_i32(t1, t1, t2);
7779     tcg_temp_free_i32(t2);
7780     if (add) {
7781         t2 = load_reg(s, a->ra);
7782         tcg_gen_add_i32(t1, t1, t2);
7783         tcg_temp_free_i32(t2);
7784     }
7785     if (a->s) {
7786         gen_logic_CC(t1);
7787     }
7788     store_reg(s, a->rd, t1);
7789     return true;
7790 }
7791
7792 static bool trans_MUL(DisasContext *s, arg_MUL *a)
7793 {
7794     return op_mla(s, a, false);
7795 }
7796
7797 static bool trans_MLA(DisasContext *s, arg_MLA *a)
7798 {
7799     return op_mla(s, a, true);
7800 }
7801
7802 static bool trans_MLS(DisasContext *s, arg_MLS *a)
7803 {
7804     TCGv_i32 t1, t2;
7805
7806     if (!ENABLE_ARCH_6T2) {
7807         return false;
7808     }
7809     t1 = load_reg(s, a->rn);
7810     t2 = load_reg(s, a->rm);
7811     tcg_gen_mul_i32(t1, t1, t2);
7812     tcg_temp_free_i32(t2);
7813     t2 = load_reg(s, a->ra);
7814     tcg_gen_sub_i32(t1, t2, t1);
7815     tcg_temp_free_i32(t2);
7816     store_reg(s, a->rd, t1);
7817     return true;
7818 }
7819
7820 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
7821 {
7822     TCGv_i32 t0, t1, t2, t3;
7823
7824     t0 = load_reg(s, a->rm);
7825     t1 = load_reg(s, a->rn);
7826     if (uns) {
7827         tcg_gen_mulu2_i32(t0, t1, t0, t1);
7828     } else {
7829         tcg_gen_muls2_i32(t0, t1, t0, t1);
7830     }
7831     if (add) {
7832         t2 = load_reg(s, a->ra);
7833         t3 = load_reg(s, a->rd);
7834         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
7835         tcg_temp_free_i32(t2);
7836         tcg_temp_free_i32(t3);
7837     }
7838     if (a->s) {
7839         gen_logicq_cc(t0, t1);
7840     }
7841     store_reg(s, a->ra, t0);
7842     store_reg(s, a->rd, t1);
7843     return true;
7844 }
7845
7846 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
7847 {
7848     return op_mlal(s, a, true, false);
7849 }
7850
7851 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
7852 {
7853     return op_mlal(s, a, false, false);
7854 }
7855
7856 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
7857 {
7858     return op_mlal(s, a, true, true);
7859 }
7860
7861 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
7862 {
7863     return op_mlal(s, a, false, true);
7864 }
7865
7866 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
7867 {
7868     TCGv_i32 t0, t1, t2, zero;
7869
7870     if (s->thumb
7871         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7872         : !ENABLE_ARCH_6) {
7873         return false;
7874     }
7875
7876     t0 = load_reg(s, a->rm);
7877     t1 = load_reg(s, a->rn);
7878     tcg_gen_mulu2_i32(t0, t1, t0, t1);
7879     zero = tcg_const_i32(0);
7880     t2 = load_reg(s, a->ra);
7881     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7882     tcg_temp_free_i32(t2);
7883     t2 = load_reg(s, a->rd);
7884     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7885     tcg_temp_free_i32(t2);
7886     tcg_temp_free_i32(zero);
7887     store_reg(s, a->ra, t0);
7888     store_reg(s, a->rd, t1);
7889     return true;
7890 }
7891
7892 /*
7893  * Saturating addition and subtraction
7894  */
7895
7896 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
7897 {
7898     TCGv_i32 t0, t1;
7899
7900     if (s->thumb
7901         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7902         : !ENABLE_ARCH_5TE) {
7903         return false;
7904     }
7905
7906     t0 = load_reg(s, a->rm);
7907     t1 = load_reg(s, a->rn);
7908     if (doub) {
7909         gen_helper_add_saturate(t1, cpu_env, t1, t1);
7910     }
7911     if (add) {
7912         gen_helper_add_saturate(t0, cpu_env, t0, t1);
7913     } else {
7914         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
7915     }
7916     tcg_temp_free_i32(t1);
7917     store_reg(s, a->rd, t0);
7918     return true;
7919 }
7920
7921 #define DO_QADDSUB(NAME, ADD, DOUB) \
7922 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
7923 {                                                        \
7924     return op_qaddsub(s, a, ADD, DOUB);                  \
7925 }
7926
7927 DO_QADDSUB(QADD, true, false)
7928 DO_QADDSUB(QSUB, false, false)
7929 DO_QADDSUB(QDADD, true, true)
7930 DO_QADDSUB(QDSUB, false, true)
7931
7932 #undef DO_QADDSUB
7933
7934 /*
7935  * Halfword multiply and multiply accumulate
7936  */
7937
7938 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
7939                        int add_long, bool nt, bool mt)
7940 {
7941     TCGv_i32 t0, t1, tl, th;
7942
7943     if (s->thumb
7944         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7945         : !ENABLE_ARCH_5TE) {
7946         return false;
7947     }
7948
7949     t0 = load_reg(s, a->rn);
7950     t1 = load_reg(s, a->rm);
7951     gen_mulxy(t0, t1, nt, mt);
7952     tcg_temp_free_i32(t1);
7953
7954     switch (add_long) {
7955     case 0:
7956         store_reg(s, a->rd, t0);
7957         break;
7958     case 1:
7959         t1 = load_reg(s, a->ra);
7960         gen_helper_add_setq(t0, cpu_env, t0, t1);
7961         tcg_temp_free_i32(t1);
7962         store_reg(s, a->rd, t0);
7963         break;
7964     case 2:
7965         tl = load_reg(s, a->ra);
7966         th = load_reg(s, a->rd);
7967         /* Sign-extend the 32-bit product to 64 bits.  */
7968         t1 = tcg_temp_new_i32();
7969         tcg_gen_sari_i32(t1, t0, 31);
7970         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
7971         tcg_temp_free_i32(t0);
7972         tcg_temp_free_i32(t1);
7973         store_reg(s, a->ra, tl);
7974         store_reg(s, a->rd, th);
7975         break;
7976     default:
7977         g_assert_not_reached();
7978     }
7979     return true;
7980 }
7981
7982 #define DO_SMLAX(NAME, add, nt, mt) \
7983 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
7984 {                                                          \
7985     return op_smlaxxx(s, a, add, nt, mt);                  \
7986 }
7987
7988 DO_SMLAX(SMULBB, 0, 0, 0)
7989 DO_SMLAX(SMULBT, 0, 0, 1)
7990 DO_SMLAX(SMULTB, 0, 1, 0)
7991 DO_SMLAX(SMULTT, 0, 1, 1)
7992
7993 DO_SMLAX(SMLABB, 1, 0, 0)
7994 DO_SMLAX(SMLABT, 1, 0, 1)
7995 DO_SMLAX(SMLATB, 1, 1, 0)
7996 DO_SMLAX(SMLATT, 1, 1, 1)
7997
7998 DO_SMLAX(SMLALBB, 2, 0, 0)
7999 DO_SMLAX(SMLALBT, 2, 0, 1)
8000 DO_SMLAX(SMLALTB, 2, 1, 0)
8001 DO_SMLAX(SMLALTT, 2, 1, 1)
8002
8003 #undef DO_SMLAX
8004
8005 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
8006 {
8007     TCGv_i32 t0, t1;
8008
8009     if (!ENABLE_ARCH_5TE) {
8010         return false;
8011     }
8012
8013     t0 = load_reg(s, a->rn);
8014     t1 = load_reg(s, a->rm);
8015     /*
8016      * Since the nominal result is product<47:16>, shift the 16-bit
8017      * input up by 16 bits, so that the result is at product<63:32>.
8018      */
8019     if (mt) {
8020         tcg_gen_andi_i32(t1, t1, 0xffff0000);
8021     } else {
8022         tcg_gen_shli_i32(t1, t1, 16);
8023     }
8024     tcg_gen_muls2_i32(t0, t1, t0, t1);
8025     tcg_temp_free_i32(t0);
8026     if (add) {
8027         t0 = load_reg(s, a->ra);
8028         gen_helper_add_setq(t1, cpu_env, t1, t0);
8029         tcg_temp_free_i32(t0);
8030     }
8031     store_reg(s, a->rd, t1);
8032     return true;
8033 }
8034
8035 #define DO_SMLAWX(NAME, add, mt) \
8036 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
8037 {                                                          \
8038     return op_smlawx(s, a, add, mt);                       \
8039 }
8040
8041 DO_SMLAWX(SMULWB, 0, 0)
8042 DO_SMLAWX(SMULWT, 0, 1)
8043 DO_SMLAWX(SMLAWB, 1, 0)
8044 DO_SMLAWX(SMLAWT, 1, 1)
8045
8046 #undef DO_SMLAWX
8047
8048 /*
8049  * MSR (immediate) and hints
8050  */
8051
8052 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
8053 {
8054     /*
8055      * When running single-threaded TCG code, use the helper to ensure that
8056      * the next round-robin scheduled vCPU gets a crack.  When running in
8057      * MTTCG we don't generate jumps to the helper as it won't affect the
8058      * scheduling of other vCPUs.
8059      */
8060     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8061         gen_set_pc_im(s, s->base.pc_next);
8062         s->base.is_jmp = DISAS_YIELD;
8063     }
8064     return true;
8065 }
8066
8067 static bool trans_WFE(DisasContext *s, arg_WFE *a)
8068 {
8069     /*
8070      * When running single-threaded TCG code, use the helper to ensure that
8071      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
8072      * just skip this instruction.  Currently the SEV/SEVL instructions,
8073      * which are *one* of many ways to wake the CPU from WFE, are not
8074      * implemented so we can't sleep like WFI does.
8075      */
8076     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8077         gen_set_pc_im(s, s->base.pc_next);
8078         s->base.is_jmp = DISAS_WFE;
8079     }
8080     return true;
8081 }
8082
8083 static bool trans_WFI(DisasContext *s, arg_WFI *a)
8084 {
8085     /* For WFI, halt the vCPU until an IRQ. */
8086     gen_set_pc_im(s, s->base.pc_next);
8087     s->base.is_jmp = DISAS_WFI;
8088     return true;
8089 }
8090
8091 static bool trans_NOP(DisasContext *s, arg_NOP *a)
8092 {
8093     return true;
8094 }
8095
8096 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
8097 {
8098     uint32_t val = ror32(a->imm, a->rot * 2);
8099     uint32_t mask = msr_mask(s, a->mask, a->r);
8100
8101     if (gen_set_psr_im(s, mask, a->r, val)) {
8102         unallocated_encoding(s);
8103     }
8104     return true;
8105 }
8106
8107 /*
8108  * Cyclic Redundancy Check
8109  */
8110
8111 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
8112 {
8113     TCGv_i32 t1, t2, t3;
8114
8115     if (!dc_isar_feature(aa32_crc32, s)) {
8116         return false;
8117     }
8118
8119     t1 = load_reg(s, a->rn);
8120     t2 = load_reg(s, a->rm);
8121     switch (sz) {
8122     case MO_8:
8123         gen_uxtb(t2);
8124         break;
8125     case MO_16:
8126         gen_uxth(t2);
8127         break;
8128     case MO_32:
8129         break;
8130     default:
8131         g_assert_not_reached();
8132     }
8133     t3 = tcg_const_i32(1 << sz);
8134     if (c) {
8135         gen_helper_crc32c(t1, t1, t2, t3);
8136     } else {
8137         gen_helper_crc32(t1, t1, t2, t3);
8138     }
8139     tcg_temp_free_i32(t2);
8140     tcg_temp_free_i32(t3);
8141     store_reg(s, a->rd, t1);
8142     return true;
8143 }
8144
8145 #define DO_CRC32(NAME, c, sz) \
8146 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
8147     { return op_crc32(s, a, c, sz); }
8148
8149 DO_CRC32(CRC32B, false, MO_8)
8150 DO_CRC32(CRC32H, false, MO_16)
8151 DO_CRC32(CRC32W, false, MO_32)
8152 DO_CRC32(CRC32CB, true, MO_8)
8153 DO_CRC32(CRC32CH, true, MO_16)
8154 DO_CRC32(CRC32CW, true, MO_32)
8155
8156 #undef DO_CRC32
8157
8158 /*
8159  * Miscellaneous instructions
8160  */
8161
8162 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
8163 {
8164     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8165         return false;
8166     }
8167     gen_mrs_banked(s, a->r, a->sysm, a->rd);
8168     return true;
8169 }
8170
8171 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
8172 {
8173     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8174         return false;
8175     }
8176     gen_msr_banked(s, a->r, a->sysm, a->rn);
8177     return true;
8178 }
8179
8180 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
8181 {
8182     TCGv_i32 tmp;
8183
8184     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8185         return false;
8186     }
8187     if (a->r) {
8188         if (IS_USER(s)) {
8189             unallocated_encoding(s);
8190             return true;
8191         }
8192         tmp = load_cpu_field(spsr);
8193     } else {
8194         tmp = tcg_temp_new_i32();
8195         gen_helper_cpsr_read(tmp, cpu_env);
8196     }
8197     store_reg(s, a->rd, tmp);
8198     return true;
8199 }
8200
8201 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
8202 {
8203     TCGv_i32 tmp;
8204     uint32_t mask = msr_mask(s, a->mask, a->r);
8205
8206     if (arm_dc_feature(s, ARM_FEATURE_M)) {
8207         return false;
8208     }
8209     tmp = load_reg(s, a->rn);
8210     if (gen_set_psr(s, mask, a->r, tmp)) {
8211         unallocated_encoding(s);
8212     }
8213     return true;
8214 }
8215
8216 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
8217 {
8218     TCGv_i32 tmp;
8219
8220     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8221         return false;
8222     }
8223     tmp = tcg_const_i32(a->sysm);
8224     gen_helper_v7m_mrs(tmp, cpu_env, tmp);
8225     store_reg(s, a->rd, tmp);
8226     return true;
8227 }
8228
8229 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
8230 {
8231     TCGv_i32 addr, reg;
8232
8233     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8234         return false;
8235     }
8236     addr = tcg_const_i32((a->mask << 10) | a->sysm);
8237     reg = load_reg(s, a->rn);
8238     gen_helper_v7m_msr(cpu_env, addr, reg);
8239     tcg_temp_free_i32(addr);
8240     tcg_temp_free_i32(reg);
8241     /* If we wrote to CONTROL, the EL might have changed */
8242     gen_helper_rebuild_hflags_m32_newel(cpu_env);
8243     gen_lookup_tb(s);
8244     return true;
8245 }
8246
8247 static bool trans_BX(DisasContext *s, arg_BX *a)
8248 {
8249     if (!ENABLE_ARCH_4T) {
8250         return false;
8251     }
8252     gen_bx_excret(s, load_reg(s, a->rm));
8253     return true;
8254 }
8255
8256 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
8257 {
8258     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
8259         return false;
8260     }
8261     /* Trivial implementation equivalent to bx.  */
8262     gen_bx(s, load_reg(s, a->rm));
8263     return true;
8264 }
8265
8266 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
8267 {
8268     TCGv_i32 tmp;
8269
8270     if (!ENABLE_ARCH_5) {
8271         return false;
8272     }
8273     tmp = load_reg(s, a->rm);
8274     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
8275     gen_bx(s, tmp);
8276     return true;
8277 }
8278
8279 /*
8280  * BXNS/BLXNS: only exist for v8M with the security extensions,
8281  * and always UNDEF if NonSecure.  We don't implement these in
8282  * the user-only mode either (in theory you can use them from
8283  * Secure User mode but they are too tied in to system emulation).
8284  */
8285 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
8286 {
8287     if (!s->v8m_secure || IS_USER_ONLY) {
8288         unallocated_encoding(s);
8289     } else {
8290         gen_bxns(s, a->rm);
8291     }
8292     return true;
8293 }
8294
8295 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
8296 {
8297     if (!s->v8m_secure || IS_USER_ONLY) {
8298         unallocated_encoding(s);
8299     } else {
8300         gen_blxns(s, a->rm);
8301     }
8302     return true;
8303 }
8304
8305 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
8306 {
8307     TCGv_i32 tmp;
8308
8309     if (!ENABLE_ARCH_5) {
8310         return false;
8311     }
8312     tmp = load_reg(s, a->rm);
8313     tcg_gen_clzi_i32(tmp, tmp, 32);
8314     store_reg(s, a->rd, tmp);
8315     return true;
8316 }
8317
8318 static bool trans_ERET(DisasContext *s, arg_ERET *a)
8319 {
8320     TCGv_i32 tmp;
8321
8322     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
8323         return false;
8324     }
8325     if (IS_USER(s)) {
8326         unallocated_encoding(s);
8327         return true;
8328     }
8329     if (s->current_el == 2) {
8330         /* ERET from Hyp uses ELR_Hyp, not LR */
8331         tmp = load_cpu_field(elr_el[2]);
8332     } else {
8333         tmp = load_reg(s, 14);
8334     }
8335     gen_exception_return(s, tmp);
8336     return true;
8337 }
8338
8339 static bool trans_HLT(DisasContext *s, arg_HLT *a)
8340 {
8341     gen_hlt(s, a->imm);
8342     return true;
8343 }
8344
8345 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
8346 {
8347     if (!ENABLE_ARCH_5) {
8348         return false;
8349     }
8350     if (arm_dc_feature(s, ARM_FEATURE_M) &&
8351         semihosting_enabled() &&
8352 #ifndef CONFIG_USER_ONLY
8353         !IS_USER(s) &&
8354 #endif
8355         (a->imm == 0xab)) {
8356         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
8357     } else {
8358         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
8359     }
8360     return true;
8361 }
8362
8363 static bool trans_HVC(DisasContext *s, arg_HVC *a)
8364 {
8365     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
8366         return false;
8367     }
8368     if (IS_USER(s)) {
8369         unallocated_encoding(s);
8370     } else {
8371         gen_hvc(s, a->imm);
8372     }
8373     return true;
8374 }
8375
8376 static bool trans_SMC(DisasContext *s, arg_SMC *a)
8377 {
8378     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
8379         return false;
8380     }
8381     if (IS_USER(s)) {
8382         unallocated_encoding(s);
8383     } else {
8384         gen_smc(s);
8385     }
8386     return true;
8387 }
8388
8389 static bool trans_SG(DisasContext *s, arg_SG *a)
8390 {
8391     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8392         !arm_dc_feature(s, ARM_FEATURE_V8)) {
8393         return false;
8394     }
8395     /*
8396      * SG (v8M only)
8397      * The bulk of the behaviour for this instruction is implemented
8398      * in v7m_handle_execute_nsc(), which deals with the insn when
8399      * it is executed by a CPU in non-secure state from memory
8400      * which is Secure & NonSecure-Callable.
8401      * Here we only need to handle the remaining cases:
8402      *  * in NS memory (including the "security extension not
8403      *    implemented" case) : NOP
8404      *  * in S memory but CPU already secure (clear IT bits)
8405      * We know that the attribute for the memory this insn is
8406      * in must match the current CPU state, because otherwise
8407      * get_phys_addr_pmsav8 would have generated an exception.
8408      */
8409     if (s->v8m_secure) {
8410         /* Like the IT insn, we don't need to generate any code */
8411         s->condexec_cond = 0;
8412         s->condexec_mask = 0;
8413     }
8414     return true;
8415 }
8416
8417 static bool trans_TT(DisasContext *s, arg_TT *a)
8418 {
8419     TCGv_i32 addr, tmp;
8420
8421     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
8422         !arm_dc_feature(s, ARM_FEATURE_V8)) {
8423         return false;
8424     }
8425     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
8426         /* We UNDEF for these UNPREDICTABLE cases */
8427         unallocated_encoding(s);
8428         return true;
8429     }
8430     if (a->A && !s->v8m_secure) {
8431         /* This case is UNDEFINED.  */
8432         unallocated_encoding(s);
8433         return true;
8434     }
8435
8436     addr = load_reg(s, a->rn);
8437     tmp = tcg_const_i32((a->A << 1) | a->T);
8438     gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
8439     tcg_temp_free_i32(addr);
8440     store_reg(s, a->rd, tmp);
8441     return true;
8442 }
8443
8444 /*
8445  * Load/store register index
8446  */
8447
8448 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
8449 {
8450     ISSInfo ret;
8451
8452     /* ISS not valid if writeback */
8453     if (p && !w) {
8454         ret = rd;
8455         if (s->base.pc_next - s->pc_curr == 2) {
8456             ret |= ISSIs16Bit;
8457         }
8458     } else {
8459         ret = ISSInvalid;
8460     }
8461     return ret;
8462 }
8463
8464 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
8465 {
8466     TCGv_i32 addr = load_reg(s, a->rn);
8467
8468     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8469         gen_helper_v8m_stackcheck(cpu_env, addr);
8470     }
8471
8472     if (a->p) {
8473         TCGv_i32 ofs = load_reg(s, a->rm);
8474         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8475         if (a->u) {
8476             tcg_gen_add_i32(addr, addr, ofs);
8477         } else {
8478             tcg_gen_sub_i32(addr, addr, ofs);
8479         }
8480         tcg_temp_free_i32(ofs);
8481     }
8482     return addr;
8483 }
8484
8485 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
8486                             TCGv_i32 addr, int address_offset)
8487 {
8488     if (!a->p) {
8489         TCGv_i32 ofs = load_reg(s, a->rm);
8490         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
8491         if (a->u) {
8492             tcg_gen_add_i32(addr, addr, ofs);
8493         } else {
8494             tcg_gen_sub_i32(addr, addr, ofs);
8495         }
8496         tcg_temp_free_i32(ofs);
8497     } else if (!a->w) {
8498         tcg_temp_free_i32(addr);
8499         return;
8500     }
8501     tcg_gen_addi_i32(addr, addr, address_offset);
8502     store_reg(s, a->rn, addr);
8503 }
8504
8505 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
8506                        MemOp mop, int mem_idx)
8507 {
8508     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8509     TCGv_i32 addr, tmp;
8510
8511     addr = op_addr_rr_pre(s, a);
8512
8513     tmp = tcg_temp_new_i32();
8514     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8515     disas_set_da_iss(s, mop, issinfo);
8516
8517     /*
8518      * Perform base writeback before the loaded value to
8519      * ensure correct behavior with overlapping index registers.
8520      */
8521     op_addr_rr_post(s, a, addr, 0);
8522     store_reg_from_load(s, a->rt, tmp);
8523     return true;
8524 }
8525
8526 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
8527                         MemOp mop, int mem_idx)
8528 {
8529     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8530     TCGv_i32 addr, tmp;
8531
8532     addr = op_addr_rr_pre(s, a);
8533
8534     tmp = load_reg(s, a->rt);
8535     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8536     disas_set_da_iss(s, mop, issinfo);
8537     tcg_temp_free_i32(tmp);
8538
8539     op_addr_rr_post(s, a, addr, 0);
8540     return true;
8541 }
8542
8543 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
8544 {
8545     int mem_idx = get_mem_index(s);
8546     TCGv_i32 addr, tmp;
8547
8548     if (!ENABLE_ARCH_5TE) {
8549         return false;
8550     }
8551     if (a->rt & 1) {
8552         unallocated_encoding(s);
8553         return true;
8554     }
8555     addr = op_addr_rr_pre(s, a);
8556
8557     tmp = tcg_temp_new_i32();
8558     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8559     store_reg(s, a->rt, tmp);
8560
8561     tcg_gen_addi_i32(addr, addr, 4);
8562
8563     tmp = tcg_temp_new_i32();
8564     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8565     store_reg(s, a->rt + 1, tmp);
8566
8567     /* LDRD w/ base writeback is undefined if the registers overlap.  */
8568     op_addr_rr_post(s, a, addr, -4);
8569     return true;
8570 }
8571
8572 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
8573 {
8574     int mem_idx = get_mem_index(s);
8575     TCGv_i32 addr, tmp;
8576
8577     if (!ENABLE_ARCH_5TE) {
8578         return false;
8579     }
8580     if (a->rt & 1) {
8581         unallocated_encoding(s);
8582         return true;
8583     }
8584     addr = op_addr_rr_pre(s, a);
8585
8586     tmp = load_reg(s, a->rt);
8587     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8588     tcg_temp_free_i32(tmp);
8589
8590     tcg_gen_addi_i32(addr, addr, 4);
8591
8592     tmp = load_reg(s, a->rt + 1);
8593     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8594     tcg_temp_free_i32(tmp);
8595
8596     op_addr_rr_post(s, a, addr, -4);
8597     return true;
8598 }
8599
8600 /*
8601  * Load/store immediate index
8602  */
8603
8604 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
8605 {
8606     int ofs = a->imm;
8607
8608     if (!a->u) {
8609         ofs = -ofs;
8610     }
8611
8612     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8613         /*
8614          * Stackcheck. Here we know 'addr' is the current SP;
8615          * U is set if we're moving SP up, else down. It is
8616          * UNKNOWN whether the limit check triggers when SP starts
8617          * below the limit and ends up above it; we chose to do so.
8618          */
8619         if (!a->u) {
8620             TCGv_i32 newsp = tcg_temp_new_i32();
8621             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
8622             gen_helper_v8m_stackcheck(cpu_env, newsp);
8623             tcg_temp_free_i32(newsp);
8624         } else {
8625             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
8626         }
8627     }
8628
8629     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
8630 }
8631
8632 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
8633                             TCGv_i32 addr, int address_offset)
8634 {
8635     if (!a->p) {
8636         if (a->u) {
8637             address_offset += a->imm;
8638         } else {
8639             address_offset -= a->imm;
8640         }
8641     } else if (!a->w) {
8642         tcg_temp_free_i32(addr);
8643         return;
8644     }
8645     tcg_gen_addi_i32(addr, addr, address_offset);
8646     store_reg(s, a->rn, addr);
8647 }
8648
8649 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
8650                        MemOp mop, int mem_idx)
8651 {
8652     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8653     TCGv_i32 addr, tmp;
8654
8655     addr = op_addr_ri_pre(s, a);
8656
8657     tmp = tcg_temp_new_i32();
8658     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8659     disas_set_da_iss(s, mop, issinfo);
8660
8661     /*
8662      * Perform base writeback before the loaded value to
8663      * ensure correct behavior with overlapping index registers.
8664      */
8665     op_addr_ri_post(s, a, addr, 0);
8666     store_reg_from_load(s, a->rt, tmp);
8667     return true;
8668 }
8669
8670 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
8671                         MemOp mop, int mem_idx)
8672 {
8673     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8674     TCGv_i32 addr, tmp;
8675
8676     addr = op_addr_ri_pre(s, a);
8677
8678     tmp = load_reg(s, a->rt);
8679     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8680     disas_set_da_iss(s, mop, issinfo);
8681     tcg_temp_free_i32(tmp);
8682
8683     op_addr_ri_post(s, a, addr, 0);
8684     return true;
8685 }
8686
8687 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8688 {
8689     int mem_idx = get_mem_index(s);
8690     TCGv_i32 addr, tmp;
8691
8692     addr = op_addr_ri_pre(s, a);
8693
8694     tmp = tcg_temp_new_i32();
8695     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8696     store_reg(s, a->rt, tmp);
8697
8698     tcg_gen_addi_i32(addr, addr, 4);
8699
8700     tmp = tcg_temp_new_i32();
8701     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8702     store_reg(s, rt2, tmp);
8703
8704     /* LDRD w/ base writeback is undefined if the registers overlap.  */
8705     op_addr_ri_post(s, a, addr, -4);
8706     return true;
8707 }
8708
8709 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8710 {
8711     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8712         return false;
8713     }
8714     return op_ldrd_ri(s, a, a->rt + 1);
8715 }
8716
8717 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8718 {
8719     arg_ldst_ri b = {
8720         .u = a->u, .w = a->w, .p = a->p,
8721         .rn = a->rn, .rt = a->rt, .imm = a->imm
8722     };
8723     return op_ldrd_ri(s, &b, a->rt2);
8724 }
8725
8726 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8727 {
8728     int mem_idx = get_mem_index(s);
8729     TCGv_i32 addr, tmp;
8730
8731     addr = op_addr_ri_pre(s, a);
8732
8733     tmp = load_reg(s, a->rt);
8734     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8735     tcg_temp_free_i32(tmp);
8736
8737     tcg_gen_addi_i32(addr, addr, 4);
8738
8739     tmp = load_reg(s, rt2);
8740     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8741     tcg_temp_free_i32(tmp);
8742
8743     op_addr_ri_post(s, a, addr, -4);
8744     return true;
8745 }
8746
8747 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8748 {
8749     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8750         return false;
8751     }
8752     return op_strd_ri(s, a, a->rt + 1);
8753 }
8754
8755 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8756 {
8757     arg_ldst_ri b = {
8758         .u = a->u, .w = a->w, .p = a->p,
8759         .rn = a->rn, .rt = a->rt, .imm = a->imm
8760     };
8761     return op_strd_ri(s, &b, a->rt2);
8762 }
8763
8764 #define DO_LDST(NAME, WHICH, MEMOP) \
8765 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
8766 {                                                                     \
8767     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
8768 }                                                                     \
8769 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
8770 {                                                                     \
8771     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
8772 }                                                                     \
8773 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
8774 {                                                                     \
8775     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
8776 }                                                                     \
8777 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
8778 {                                                                     \
8779     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
8780 }
8781
8782 DO_LDST(LDR, load, MO_UL)
8783 DO_LDST(LDRB, load, MO_UB)
8784 DO_LDST(LDRH, load, MO_UW)
8785 DO_LDST(LDRSB, load, MO_SB)
8786 DO_LDST(LDRSH, load, MO_SW)
8787
8788 DO_LDST(STR, store, MO_UL)
8789 DO_LDST(STRB, store, MO_UB)
8790 DO_LDST(STRH, store, MO_UW)
8791
8792 #undef DO_LDST
8793
8794 /*
8795  * Synchronization primitives
8796  */
8797
8798 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
8799 {
8800     TCGv_i32 addr, tmp;
8801     TCGv taddr;
8802
8803     opc |= s->be_data;
8804     addr = load_reg(s, a->rn);
8805     taddr = gen_aa32_addr(s, addr, opc);
8806     tcg_temp_free_i32(addr);
8807
8808     tmp = load_reg(s, a->rt2);
8809     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
8810     tcg_temp_free(taddr);
8811
8812     store_reg(s, a->rt, tmp);
8813     return true;
8814 }
8815
8816 static bool trans_SWP(DisasContext *s, arg_SWP *a)
8817 {
8818     return op_swp(s, a, MO_UL | MO_ALIGN);
8819 }
8820
8821 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
8822 {
8823     return op_swp(s, a, MO_UB);
8824 }
8825
8826 /*
8827  * Load/Store Exclusive and Load-Acquire/Store-Release
8828  */
8829
8830 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
8831 {
8832     TCGv_i32 addr;
8833     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
8834     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
8835
8836     /* We UNDEF for these UNPREDICTABLE cases.  */
8837     if (a->rd == 15 || a->rn == 15 || a->rt == 15
8838         || a->rd == a->rn || a->rd == a->rt
8839         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
8840         || (mop == MO_64
8841             && (a->rt2 == 15
8842                 || a->rd == a->rt2
8843                 || (!v8a && s->thumb && a->rt2 == 13)))) {
8844         unallocated_encoding(s);
8845         return true;
8846     }
8847
8848     if (rel) {
8849         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8850     }
8851
8852     addr = tcg_temp_local_new_i32();
8853     load_reg_var(s, addr, a->rn);
8854     tcg_gen_addi_i32(addr, addr, a->imm);
8855
8856     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
8857     tcg_temp_free_i32(addr);
8858     return true;
8859 }
8860
8861 static bool trans_STREX(DisasContext *s, arg_STREX *a)
8862 {
8863     if (!ENABLE_ARCH_6) {
8864         return false;
8865     }
8866     return op_strex(s, a, MO_32, false);
8867 }
8868
8869 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
8870 {
8871     if (!ENABLE_ARCH_6K) {
8872         return false;
8873     }
8874     /* We UNDEF for these UNPREDICTABLE cases.  */
8875     if (a->rt & 1) {
8876         unallocated_encoding(s);
8877         return true;
8878     }
8879     a->rt2 = a->rt + 1;
8880     return op_strex(s, a, MO_64, false);
8881 }
8882
8883 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
8884 {
8885     return op_strex(s, a, MO_64, false);
8886 }
8887
8888 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
8889 {
8890     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8891         return false;
8892     }
8893     return op_strex(s, a, MO_8, false);
8894 }
8895
8896 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
8897 {
8898     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8899         return false;
8900     }
8901     return op_strex(s, a, MO_16, false);
8902 }
8903
8904 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
8905 {
8906     if (!ENABLE_ARCH_8) {
8907         return false;
8908     }
8909     return op_strex(s, a, MO_32, true);
8910 }
8911
8912 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
8913 {
8914     if (!ENABLE_ARCH_8) {
8915         return false;
8916     }
8917     /* We UNDEF for these UNPREDICTABLE cases.  */
8918     if (a->rt & 1) {
8919         unallocated_encoding(s);
8920         return true;
8921     }
8922     a->rt2 = a->rt + 1;
8923     return op_strex(s, a, MO_64, true);
8924 }
8925
8926 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
8927 {
8928     if (!ENABLE_ARCH_8) {
8929         return false;
8930     }
8931     return op_strex(s, a, MO_64, true);
8932 }
8933
8934 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
8935 {
8936     if (!ENABLE_ARCH_8) {
8937         return false;
8938     }
8939     return op_strex(s, a, MO_8, true);
8940 }
8941
8942 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
8943 {
8944     if (!ENABLE_ARCH_8) {
8945         return false;
8946     }
8947     return op_strex(s, a, MO_16, true);
8948 }
8949
8950 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
8951 {
8952     TCGv_i32 addr, tmp;
8953
8954     if (!ENABLE_ARCH_8) {
8955         return false;
8956     }
8957     /* We UNDEF for these UNPREDICTABLE cases.  */
8958     if (a->rn == 15 || a->rt == 15) {
8959         unallocated_encoding(s);
8960         return true;
8961     }
8962
8963     addr = load_reg(s, a->rn);
8964     tmp = load_reg(s, a->rt);
8965     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8966     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
8967     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
8968
8969     tcg_temp_free_i32(tmp);
8970     tcg_temp_free_i32(addr);
8971     return true;
8972 }
8973
8974 static bool trans_STL(DisasContext *s, arg_STL *a)
8975 {
8976     return op_stl(s, a, MO_UL);
8977 }
8978
8979 static bool trans_STLB(DisasContext *s, arg_STL *a)
8980 {
8981     return op_stl(s, a, MO_UB);
8982 }
8983
8984 static bool trans_STLH(DisasContext *s, arg_STL *a)
8985 {
8986     return op_stl(s, a, MO_UW);
8987 }
8988
8989 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
8990 {
8991     TCGv_i32 addr;
8992     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
8993     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
8994
8995     /* We UNDEF for these UNPREDICTABLE cases.  */
8996     if (a->rn == 15 || a->rt == 15
8997         || (!v8a && s->thumb && a->rt == 13)
8998         || (mop == MO_64
8999             && (a->rt2 == 15 || a->rt == a->rt2
9000                 || (!v8a && s->thumb && a->rt2 == 13)))) {
9001         unallocated_encoding(s);
9002         return true;
9003     }
9004
9005     addr = tcg_temp_local_new_i32();
9006     load_reg_var(s, addr, a->rn);
9007     tcg_gen_addi_i32(addr, addr, a->imm);
9008
9009     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
9010     tcg_temp_free_i32(addr);
9011
9012     if (acq) {
9013         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
9014     }
9015     return true;
9016 }
9017
9018 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
9019 {
9020     if (!ENABLE_ARCH_6) {
9021         return false;
9022     }
9023     return op_ldrex(s, a, MO_32, false);
9024 }
9025
9026 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
9027 {
9028     if (!ENABLE_ARCH_6K) {
9029         return false;
9030     }
9031     /* We UNDEF for these UNPREDICTABLE cases.  */
9032     if (a->rt & 1) {
9033         unallocated_encoding(s);
9034         return true;
9035     }
9036     a->rt2 = a->rt + 1;
9037     return op_ldrex(s, a, MO_64, false);
9038 }
9039
9040 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
9041 {
9042     return op_ldrex(s, a, MO_64, false);
9043 }
9044
9045 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
9046 {
9047     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9048         return false;
9049     }
9050     return op_ldrex(s, a, MO_8, false);
9051 }
9052
9053 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
9054 {
9055     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9056         return false;
9057     }
9058     return op_ldrex(s, a, MO_16, false);
9059 }
9060
9061 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
9062 {
9063     if (!ENABLE_ARCH_8) {
9064         return false;
9065     }
9066     return op_ldrex(s, a, MO_32, true);
9067 }
9068
9069 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
9070 {
9071     if (!ENABLE_ARCH_8) {
9072         return false;
9073     }
9074     /* We UNDEF for these UNPREDICTABLE cases.  */
9075     if (a->rt & 1) {
9076         unallocated_encoding(s);
9077         return true;
9078     }
9079     a->rt2 = a->rt + 1;
9080     return op_ldrex(s, a, MO_64, true);
9081 }
9082
9083 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
9084 {
9085     if (!ENABLE_ARCH_8) {
9086         return false;
9087     }
9088     return op_ldrex(s, a, MO_64, true);
9089 }
9090
9091 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
9092 {
9093     if (!ENABLE_ARCH_8) {
9094         return false;
9095     }
9096     return op_ldrex(s, a, MO_8, true);
9097 }
9098
9099 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
9100 {
9101     if (!ENABLE_ARCH_8) {
9102         return false;
9103     }
9104     return op_ldrex(s, a, MO_16, true);
9105 }
9106
9107 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
9108 {
9109     TCGv_i32 addr, tmp;
9110
9111     if (!ENABLE_ARCH_8) {
9112         return false;
9113     }
9114     /* We UNDEF for these UNPREDICTABLE cases.  */
9115     if (a->rn == 15 || a->rt == 15) {
9116         unallocated_encoding(s);
9117         return true;
9118     }
9119
9120     addr = load_reg(s, a->rn);
9121     tmp = tcg_temp_new_i32();
9122     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
9123     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
9124     tcg_temp_free_i32(addr);
9125
9126     store_reg(s, a->rt, tmp);
9127     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
9128     return true;
9129 }
9130
9131 static bool trans_LDA(DisasContext *s, arg_LDA *a)
9132 {
9133     return op_lda(s, a, MO_UL);
9134 }
9135
9136 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
9137 {
9138     return op_lda(s, a, MO_UB);
9139 }
9140
9141 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
9142 {
9143     return op_lda(s, a, MO_UW);
9144 }
9145
9146 /*
9147  * Media instructions
9148  */
9149
9150 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
9151 {
9152     TCGv_i32 t1, t2;
9153
9154     if (!ENABLE_ARCH_6) {
9155         return false;
9156     }
9157
9158     t1 = load_reg(s, a->rn);
9159     t2 = load_reg(s, a->rm);
9160     gen_helper_usad8(t1, t1, t2);
9161     tcg_temp_free_i32(t2);
9162     if (a->ra != 15) {
9163         t2 = load_reg(s, a->ra);
9164         tcg_gen_add_i32(t1, t1, t2);
9165         tcg_temp_free_i32(t2);
9166     }
9167     store_reg(s, a->rd, t1);
9168     return true;
9169 }
9170
9171 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
9172 {
9173     TCGv_i32 tmp;
9174     int width = a->widthm1 + 1;
9175     int shift = a->lsb;
9176
9177     if (!ENABLE_ARCH_6T2) {
9178         return false;
9179     }
9180     if (shift + width > 32) {
9181         /* UNPREDICTABLE; we choose to UNDEF */
9182         unallocated_encoding(s);
9183         return true;
9184     }
9185
9186     tmp = load_reg(s, a->rn);
9187     if (u) {
9188         tcg_gen_extract_i32(tmp, tmp, shift, width);
9189     } else {
9190         tcg_gen_sextract_i32(tmp, tmp, shift, width);
9191     }
9192     store_reg(s, a->rd, tmp);
9193     return true;
9194 }
9195
9196 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
9197 {
9198     return op_bfx(s, a, false);
9199 }
9200
9201 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
9202 {
9203     return op_bfx(s, a, true);
9204 }
9205
9206 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
9207 {
9208     TCGv_i32 tmp;
9209     int msb = a->msb, lsb = a->lsb;
9210     int width;
9211
9212     if (!ENABLE_ARCH_6T2) {
9213         return false;
9214     }
9215     if (msb < lsb) {
9216         /* UNPREDICTABLE; we choose to UNDEF */
9217         unallocated_encoding(s);
9218         return true;
9219     }
9220
9221     width = msb + 1 - lsb;
9222     if (a->rn == 15) {
9223         /* BFC */
9224         tmp = tcg_const_i32(0);
9225     } else {
9226         /* BFI */
9227         tmp = load_reg(s, a->rn);
9228     }
9229     if (width != 32) {
9230         TCGv_i32 tmp2 = load_reg(s, a->rd);
9231         tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
9232         tcg_temp_free_i32(tmp2);
9233     }
9234     store_reg(s, a->rd, tmp);
9235     return true;
9236 }
9237
9238 static bool trans_UDF(DisasContext *s, arg_UDF *a)
9239 {
9240     unallocated_encoding(s);
9241     return true;
9242 }
9243
9244 /*
9245  * Parallel addition and subtraction
9246  */
9247
9248 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
9249                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
9250 {
9251     TCGv_i32 t0, t1;
9252
9253     if (s->thumb
9254         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9255         : !ENABLE_ARCH_6) {
9256         return false;
9257     }
9258
9259     t0 = load_reg(s, a->rn);
9260     t1 = load_reg(s, a->rm);
9261
9262     gen(t0, t0, t1);
9263
9264     tcg_temp_free_i32(t1);
9265     store_reg(s, a->rd, t0);
9266     return true;
9267 }
9268
9269 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
9270                              void (*gen)(TCGv_i32, TCGv_i32,
9271                                          TCGv_i32, TCGv_ptr))
9272 {
9273     TCGv_i32 t0, t1;
9274     TCGv_ptr ge;
9275
9276     if (s->thumb
9277         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9278         : !ENABLE_ARCH_6) {
9279         return false;
9280     }
9281
9282     t0 = load_reg(s, a->rn);
9283     t1 = load_reg(s, a->rm);
9284
9285     ge = tcg_temp_new_ptr();
9286     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
9287     gen(t0, t0, t1, ge);
9288
9289     tcg_temp_free_ptr(ge);
9290     tcg_temp_free_i32(t1);
9291     store_reg(s, a->rd, t0);
9292     return true;
9293 }
9294
9295 #define DO_PAR_ADDSUB(NAME, helper) \
9296 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
9297 {                                                       \
9298     return op_par_addsub(s, a, helper);                 \
9299 }
9300
9301 #define DO_PAR_ADDSUB_GE(NAME, helper) \
9302 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
9303 {                                                       \
9304     return op_par_addsub_ge(s, a, helper);              \
9305 }
9306
9307 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
9308 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
9309 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
9310 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
9311 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
9312 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
9313
9314 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
9315 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
9316 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
9317 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
9318 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
9319 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
9320
9321 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
9322 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
9323 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
9324 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
9325 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
9326 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
9327
9328 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
9329 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
9330 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
9331 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
9332 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
9333 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
9334
9335 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
9336 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
9337 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
9338 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
9339 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
9340 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
9341
9342 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
9343 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
9344 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
9345 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
9346 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
9347 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
9348
9349 #undef DO_PAR_ADDSUB
9350 #undef DO_PAR_ADDSUB_GE
9351
9352 /*
9353  * Packing, unpacking, saturation, and reversal
9354  */
9355
9356 static bool trans_PKH(DisasContext *s, arg_PKH *a)
9357 {
9358     TCGv_i32 tn, tm;
9359     int shift = a->imm;
9360
9361     if (s->thumb
9362         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9363         : !ENABLE_ARCH_6) {
9364         return false;
9365     }
9366
9367     tn = load_reg(s, a->rn);
9368     tm = load_reg(s, a->rm);
9369     if (a->tb) {
9370         /* PKHTB */
9371         if (shift == 0) {
9372             shift = 31;
9373         }
9374         tcg_gen_sari_i32(tm, tm, shift);
9375         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
9376     } else {
9377         /* PKHBT */
9378         tcg_gen_shli_i32(tm, tm, shift);
9379         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
9380     }
9381     tcg_temp_free_i32(tm);
9382     store_reg(s, a->rd, tn);
9383     return true;
9384 }
9385
9386 static bool op_sat(DisasContext *s, arg_sat *a,
9387                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
9388 {
9389     TCGv_i32 tmp, satimm;
9390     int shift = a->imm;
9391
9392     if (!ENABLE_ARCH_6) {
9393         return false;
9394     }
9395
9396     tmp = load_reg(s, a->rn);
9397     if (a->sh) {
9398         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
9399     } else {
9400         tcg_gen_shli_i32(tmp, tmp, shift);
9401     }
9402
9403     satimm = tcg_const_i32(a->satimm);
9404     gen(tmp, cpu_env, tmp, satimm);
9405     tcg_temp_free_i32(satimm);
9406
9407     store_reg(s, a->rd, tmp);
9408     return true;
9409 }
9410
9411 static bool trans_SSAT(DisasContext *s, arg_sat *a)
9412 {
9413     return op_sat(s, a, gen_helper_ssat);
9414 }
9415
9416 static bool trans_USAT(DisasContext *s, arg_sat *a)
9417 {
9418     return op_sat(s, a, gen_helper_usat);
9419 }
9420
9421 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
9422 {
9423     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9424         return false;
9425     }
9426     return op_sat(s, a, gen_helper_ssat16);
9427 }
9428
9429 static bool trans_USAT16(DisasContext *s, arg_sat *a)
9430 {
9431     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9432         return false;
9433     }
9434     return op_sat(s, a, gen_helper_usat16);
9435 }
9436
9437 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
9438                    void (*gen_extract)(TCGv_i32, TCGv_i32),
9439                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
9440 {
9441     TCGv_i32 tmp;
9442
9443     if (!ENABLE_ARCH_6) {
9444         return false;
9445     }
9446
9447     tmp = load_reg(s, a->rm);
9448     /*
9449      * TODO: In many cases we could do a shift instead of a rotate.
9450      * Combined with a simple extend, that becomes an extract.
9451      */
9452     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
9453     gen_extract(tmp, tmp);
9454
9455     if (a->rn != 15) {
9456         TCGv_i32 tmp2 = load_reg(s, a->rn);
9457         gen_add(tmp, tmp, tmp2);
9458         tcg_temp_free_i32(tmp2);
9459     }
9460     store_reg(s, a->rd, tmp);
9461     return true;
9462 }
9463
9464 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
9465 {
9466     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
9467 }
9468
9469 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
9470 {
9471     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
9472 }
9473
9474 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
9475 {
9476     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9477         return false;
9478     }
9479     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
9480 }
9481
9482 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
9483 {
9484     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
9485 }
9486
9487 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
9488 {
9489     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
9490 }
9491
9492 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
9493 {
9494     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9495         return false;
9496     }
9497     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
9498 }
9499
9500 static bool trans_SEL(DisasContext *s, arg_rrr *a)
9501 {
9502     TCGv_i32 t1, t2, t3;
9503
9504     if (s->thumb
9505         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9506         : !ENABLE_ARCH_6) {
9507         return false;
9508     }
9509
9510     t1 = load_reg(s, a->rn);
9511     t2 = load_reg(s, a->rm);
9512     t3 = tcg_temp_new_i32();
9513     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
9514     gen_helper_sel_flags(t1, t3, t1, t2);
9515     tcg_temp_free_i32(t3);
9516     tcg_temp_free_i32(t2);
9517     store_reg(s, a->rd, t1);
9518     return true;
9519 }
9520
9521 static bool op_rr(DisasContext *s, arg_rr *a,
9522                   void (*gen)(TCGv_i32, TCGv_i32))
9523 {
9524     TCGv_i32 tmp;
9525
9526     tmp = load_reg(s, a->rm);
9527     gen(tmp, tmp);
9528     store_reg(s, a->rd, tmp);
9529     return true;
9530 }
9531
9532 static bool trans_REV(DisasContext *s, arg_rr *a)
9533 {
9534     if (!ENABLE_ARCH_6) {
9535         return false;
9536     }
9537     return op_rr(s, a, tcg_gen_bswap32_i32);
9538 }
9539
9540 static bool trans_REV16(DisasContext *s, arg_rr *a)
9541 {
9542     if (!ENABLE_ARCH_6) {
9543         return false;
9544     }
9545     return op_rr(s, a, gen_rev16);
9546 }
9547
9548 static bool trans_REVSH(DisasContext *s, arg_rr *a)
9549 {
9550     if (!ENABLE_ARCH_6) {
9551         return false;
9552     }
9553     return op_rr(s, a, gen_revsh);
9554 }
9555
9556 static bool trans_RBIT(DisasContext *s, arg_rr *a)
9557 {
9558     if (!ENABLE_ARCH_6T2) {
9559         return false;
9560     }
9561     return op_rr(s, a, gen_helper_rbit);
9562 }
9563
9564 /*
9565  * Signed multiply, signed and unsigned divide
9566  */
9567
9568 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9569 {
9570     TCGv_i32 t1, t2;
9571
9572     if (!ENABLE_ARCH_6) {
9573         return false;
9574     }
9575
9576     t1 = load_reg(s, a->rn);
9577     t2 = load_reg(s, a->rm);
9578     if (m_swap) {
9579         gen_swap_half(t2);
9580     }
9581     gen_smul_dual(t1, t2);
9582
9583     if (sub) {
9584         /* This subtraction cannot overflow. */
9585         tcg_gen_sub_i32(t1, t1, t2);
9586     } else {
9587         /*
9588          * This addition cannot overflow 32 bits; however it may
9589          * overflow considered as a signed operation, in which case
9590          * we must set the Q flag.
9591          */
9592         gen_helper_add_setq(t1, cpu_env, t1, t2);
9593     }
9594     tcg_temp_free_i32(t2);
9595
9596     if (a->ra != 15) {
9597         t2 = load_reg(s, a->ra);
9598         gen_helper_add_setq(t1, cpu_env, t1, t2);
9599         tcg_temp_free_i32(t2);
9600     }
9601     store_reg(s, a->rd, t1);
9602     return true;
9603 }
9604
9605 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
9606 {
9607     return op_smlad(s, a, false, false);
9608 }
9609
9610 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
9611 {
9612     return op_smlad(s, a, true, false);
9613 }
9614
9615 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
9616 {
9617     return op_smlad(s, a, false, true);
9618 }
9619
9620 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
9621 {
9622     return op_smlad(s, a, true, true);
9623 }
9624
9625 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9626 {
9627     TCGv_i32 t1, t2;
9628     TCGv_i64 l1, l2;
9629
9630     if (!ENABLE_ARCH_6) {
9631         return false;
9632     }
9633
9634     t1 = load_reg(s, a->rn);
9635     t2 = load_reg(s, a->rm);
9636     if (m_swap) {
9637         gen_swap_half(t2);
9638     }
9639     gen_smul_dual(t1, t2);
9640
9641     l1 = tcg_temp_new_i64();
9642     l2 = tcg_temp_new_i64();
9643     tcg_gen_ext_i32_i64(l1, t1);
9644     tcg_gen_ext_i32_i64(l2, t2);
9645     tcg_temp_free_i32(t1);
9646     tcg_temp_free_i32(t2);
9647
9648     if (sub) {
9649         tcg_gen_sub_i64(l1, l1, l2);
9650     } else {
9651         tcg_gen_add_i64(l1, l1, l2);
9652     }
9653     tcg_temp_free_i64(l2);
9654
9655     gen_addq(s, l1, a->ra, a->rd);
9656     gen_storeq_reg(s, a->ra, a->rd, l1);
9657     tcg_temp_free_i64(l1);
9658     return true;
9659 }
9660
9661 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
9662 {
9663     return op_smlald(s, a, false, false);
9664 }
9665
9666 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
9667 {
9668     return op_smlald(s, a, true, false);
9669 }
9670
9671 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
9672 {
9673     return op_smlald(s, a, false, true);
9674 }
9675
9676 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
9677 {
9678     return op_smlald(s, a, true, true);
9679 }
9680
9681 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
9682 {
9683     TCGv_i32 t1, t2;
9684
9685     if (s->thumb
9686         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9687         : !ENABLE_ARCH_6) {
9688         return false;
9689     }
9690
9691     t1 = load_reg(s, a->rn);
9692     t2 = load_reg(s, a->rm);
9693     tcg_gen_muls2_i32(t2, t1, t1, t2);
9694
9695     if (a->ra != 15) {
9696         TCGv_i32 t3 = load_reg(s, a->ra);
9697         if (sub) {
9698             /*
9699              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
9700              * a non-zero multiplicand lowpart, and the correct result
9701              * lowpart for rounding.
9702              */
9703             TCGv_i32 zero = tcg_const_i32(0);
9704             tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
9705             tcg_temp_free_i32(zero);
9706         } else {
9707             tcg_gen_add_i32(t1, t1, t3);
9708         }
9709         tcg_temp_free_i32(t3);
9710     }
9711     if (round) {
9712         /*
9713          * Adding 0x80000000 to the 64-bit quantity means that we have
9714          * carry in to the high word when the low word has the msb set.
9715          */
9716         tcg_gen_shri_i32(t2, t2, 31);
9717         tcg_gen_add_i32(t1, t1, t2);
9718     }
9719     tcg_temp_free_i32(t2);
9720     store_reg(s, a->rd, t1);
9721     return true;
9722 }
9723
9724 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
9725 {
9726     return op_smmla(s, a, false, false);
9727 }
9728
9729 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
9730 {
9731     return op_smmla(s, a, true, false);
9732 }
9733
9734 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
9735 {
9736     return op_smmla(s, a, false, true);
9737 }
9738
9739 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
9740 {
9741     return op_smmla(s, a, true, true);
9742 }
9743
9744 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
9745 {
9746     TCGv_i32 t1, t2;
9747
9748     if (s->thumb
9749         ? !dc_isar_feature(aa32_thumb_div, s)
9750         : !dc_isar_feature(aa32_arm_div, s)) {
9751         return false;
9752     }
9753
9754     t1 = load_reg(s, a->rn);
9755     t2 = load_reg(s, a->rm);
9756     if (u) {
9757         gen_helper_udiv(t1, t1, t2);
9758     } else {
9759         gen_helper_sdiv(t1, t1, t2);
9760     }
9761     tcg_temp_free_i32(t2);
9762     store_reg(s, a->rd, t1);
9763     return true;
9764 }
9765
9766 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
9767 {
9768     return op_div(s, a, false);
9769 }
9770
9771 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
9772 {
9773     return op_div(s, a, true);
9774 }
9775
9776 /*
9777  * Block data transfer
9778  */
9779
9780 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
9781 {
9782     TCGv_i32 addr = load_reg(s, a->rn);
9783
9784     if (a->b) {
9785         if (a->i) {
9786             /* pre increment */
9787             tcg_gen_addi_i32(addr, addr, 4);
9788         } else {
9789             /* pre decrement */
9790             tcg_gen_addi_i32(addr, addr, -(n * 4));
9791         }
9792     } else if (!a->i && n != 1) {
9793         /* post decrement */
9794         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9795     }
9796
9797     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
9798         /*
9799          * If the writeback is incrementing SP rather than
9800          * decrementing it, and the initial SP is below the
9801          * stack limit but the final written-back SP would
9802          * be above, then then we must not perform any memory
9803          * accesses, but it is IMPDEF whether we generate
9804          * an exception. We choose to do so in this case.
9805          * At this point 'addr' is the lowest address, so
9806          * either the original SP (if incrementing) or our
9807          * final SP (if decrementing), so that's what we check.
9808          */
9809         gen_helper_v8m_stackcheck(cpu_env, addr);
9810     }
9811
9812     return addr;
9813 }
9814
9815 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
9816                                TCGv_i32 addr, int n)
9817 {
9818     if (a->w) {
9819         /* write back */
9820         if (!a->b) {
9821             if (a->i) {
9822                 /* post increment */
9823                 tcg_gen_addi_i32(addr, addr, 4);
9824             } else {
9825                 /* post decrement */
9826                 tcg_gen_addi_i32(addr, addr, -(n * 4));
9827             }
9828         } else if (!a->i && n != 1) {
9829             /* pre decrement */
9830             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9831         }
9832         store_reg(s, a->rn, addr);
9833     } else {
9834         tcg_temp_free_i32(addr);
9835     }
9836 }
9837
9838 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
9839 {
9840     int i, j, n, list, mem_idx;
9841     bool user = a->u;
9842     TCGv_i32 addr, tmp, tmp2;
9843
9844     if (user) {
9845         /* STM (user) */
9846         if (IS_USER(s)) {
9847             /* Only usable in supervisor mode.  */
9848             unallocated_encoding(s);
9849             return true;
9850         }
9851     }
9852
9853     list = a->list;
9854     n = ctpop16(list);
9855     if (n < min_n || a->rn == 15) {
9856         unallocated_encoding(s);
9857         return true;
9858     }
9859
9860     addr = op_addr_block_pre(s, a, n);
9861     mem_idx = get_mem_index(s);
9862
9863     for (i = j = 0; i < 16; i++) {
9864         if (!(list & (1 << i))) {
9865             continue;
9866         }
9867
9868         if (user && i != 15) {
9869             tmp = tcg_temp_new_i32();
9870             tmp2 = tcg_const_i32(i);
9871             gen_helper_get_user_reg(tmp, cpu_env, tmp2);
9872             tcg_temp_free_i32(tmp2);
9873         } else {
9874             tmp = load_reg(s, i);
9875         }
9876         gen_aa32_st32(s, tmp, addr, mem_idx);
9877         tcg_temp_free_i32(tmp);
9878
9879         /* No need to add after the last transfer.  */
9880         if (++j != n) {
9881             tcg_gen_addi_i32(addr, addr, 4);
9882         }
9883     }
9884
9885     op_addr_block_post(s, a, addr, n);
9886     return true;
9887 }
9888
9889 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
9890 {
9891     /* BitCount(list) < 1 is UNPREDICTABLE */
9892     return op_stm(s, a, 1);
9893 }
9894
9895 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
9896 {
9897     /* Writeback register in register list is UNPREDICTABLE for T32.  */
9898     if (a->w && (a->list & (1 << a->rn))) {
9899         unallocated_encoding(s);
9900         return true;
9901     }
9902     /* BitCount(list) < 2 is UNPREDICTABLE */
9903     return op_stm(s, a, 2);
9904 }
9905
9906 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
9907 {
9908     int i, j, n, list, mem_idx;
9909     bool loaded_base;
9910     bool user = a->u;
9911     bool exc_return = false;
9912     TCGv_i32 addr, tmp, tmp2, loaded_var;
9913
9914     if (user) {
9915         /* LDM (user), LDM (exception return) */
9916         if (IS_USER(s)) {
9917             /* Only usable in supervisor mode.  */
9918             unallocated_encoding(s);
9919             return true;
9920         }
9921         if (extract32(a->list, 15, 1)) {
9922             exc_return = true;
9923             user = false;
9924         } else {
9925             /* LDM (user) does not allow writeback.  */
9926             if (a->w) {
9927                 unallocated_encoding(s);
9928                 return true;
9929             }
9930         }
9931     }
9932
9933     list = a->list;
9934     n = ctpop16(list);
9935     if (n < min_n || a->rn == 15) {
9936         unallocated_encoding(s);
9937         return true;
9938     }
9939
9940     addr = op_addr_block_pre(s, a, n);
9941     mem_idx = get_mem_index(s);
9942     loaded_base = false;
9943     loaded_var = NULL;
9944
9945     for (i = j = 0; i < 16; i++) {
9946         if (!(list & (1 << i))) {
9947             continue;
9948         }
9949
9950         tmp = tcg_temp_new_i32();
9951         gen_aa32_ld32u(s, tmp, addr, mem_idx);
9952         if (user) {
9953             tmp2 = tcg_const_i32(i);
9954             gen_helper_set_user_reg(cpu_env, tmp2, tmp);
9955             tcg_temp_free_i32(tmp2);
9956             tcg_temp_free_i32(tmp);
9957         } else if (i == a->rn) {
9958             loaded_var = tmp;
9959             loaded_base = true;
9960         } else if (i == 15 && exc_return) {
9961             store_pc_exc_ret(s, tmp);
9962         } else {
9963             store_reg_from_load(s, i, tmp);
9964         }
9965
9966         /* No need to add after the last transfer.  */
9967         if (++j != n) {
9968             tcg_gen_addi_i32(addr, addr, 4);
9969         }
9970     }
9971
9972     op_addr_block_post(s, a, addr, n);
9973
9974     if (loaded_base) {
9975         /* Note that we reject base == pc above.  */
9976         store_reg(s, a->rn, loaded_var);
9977     }
9978
9979     if (exc_return) {
9980         /* Restore CPSR from SPSR.  */
9981         tmp = load_cpu_field(spsr);
9982         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
9983             gen_io_start();
9984         }
9985         gen_helper_cpsr_write_eret(cpu_env, tmp);
9986         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
9987             gen_io_end();
9988         }
9989         tcg_temp_free_i32(tmp);
9990         /* Must exit loop to check un-masked IRQs */
9991         s->base.is_jmp = DISAS_EXIT;
9992     }
9993     return true;
9994 }
9995
9996 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
9997 {
9998     /*
9999      * Writeback register in register list is UNPREDICTABLE
10000      * for ArchVersion() >= 7.  Prior to v7, A32 would write
10001      * an UNKNOWN value to the base register.
10002      */
10003     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
10004         unallocated_encoding(s);
10005         return true;
10006     }
10007     /* BitCount(list) < 1 is UNPREDICTABLE */
10008     return do_ldm(s, a, 1);
10009 }
10010
10011 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
10012 {
10013     /* Writeback register in register list is UNPREDICTABLE for T32. */
10014     if (a->w && (a->list & (1 << a->rn))) {
10015         unallocated_encoding(s);
10016         return true;
10017     }
10018     /* BitCount(list) < 2 is UNPREDICTABLE */
10019     return do_ldm(s, a, 2);
10020 }
10021
10022 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
10023 {
10024     /* Writeback is conditional on the base register not being loaded.  */
10025     a->w = !(a->list & (1 << a->rn));
10026     /* BitCount(list) < 1 is UNPREDICTABLE */
10027     return do_ldm(s, a, 1);
10028 }
10029
10030 /*
10031  * Branch, branch with link
10032  */
10033
10034 static bool trans_B(DisasContext *s, arg_i *a)
10035 {
10036     gen_jmp(s, read_pc(s) + a->imm);
10037     return true;
10038 }
10039
10040 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
10041 {
10042     /* This has cond from encoding, required to be outside IT block.  */
10043     if (a->cond >= 0xe) {
10044         return false;
10045     }
10046     if (s->condexec_mask) {
10047         unallocated_encoding(s);
10048         return true;
10049     }
10050     arm_skip_unless(s, a->cond);
10051     gen_jmp(s, read_pc(s) + a->imm);
10052     return true;
10053 }
10054
10055 static bool trans_BL(DisasContext *s, arg_i *a)
10056 {
10057     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10058     gen_jmp(s, read_pc(s) + a->imm);
10059     return true;
10060 }
10061
10062 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
10063 {
10064     TCGv_i32 tmp;
10065
10066     /* For A32, ARCH(5) is checked near the start of the uncond block. */
10067     if (s->thumb && (a->imm & 2)) {
10068         return false;
10069     }
10070     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
10071     tmp = tcg_const_i32(!s->thumb);
10072     store_cpu_field(tmp, thumb);
10073     gen_jmp(s, (read_pc(s) & ~3) + a->imm);
10074     return true;
10075 }
10076
10077 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
10078 {
10079     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10080     tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
10081     return true;
10082 }
10083
10084 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
10085 {
10086     TCGv_i32 tmp = tcg_temp_new_i32();
10087
10088     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10089     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
10090     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10091     gen_bx(s, tmp);
10092     return true;
10093 }
10094
10095 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
10096 {
10097     TCGv_i32 tmp;
10098
10099     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10100     if (!ENABLE_ARCH_5) {
10101         return false;
10102     }
10103     tmp = tcg_temp_new_i32();
10104     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
10105     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
10106     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
10107     gen_bx(s, tmp);
10108     return true;
10109 }
10110
10111 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
10112 {
10113     TCGv_i32 addr, tmp;
10114
10115     tmp = load_reg(s, a->rm);
10116     if (half) {
10117         tcg_gen_add_i32(tmp, tmp, tmp);
10118     }
10119     addr = load_reg(s, a->rn);
10120     tcg_gen_add_i32(addr, addr, tmp);
10121
10122     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
10123                     half ? MO_UW | s->be_data : MO_UB);
10124     tcg_temp_free_i32(addr);
10125
10126     tcg_gen_add_i32(tmp, tmp, tmp);
10127     tcg_gen_addi_i32(tmp, tmp, read_pc(s));
10128     store_reg(s, 15, tmp);
10129     return true;
10130 }
10131
10132 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
10133 {
10134     return op_tbranch(s, a, false);
10135 }
10136
10137 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
10138 {
10139     return op_tbranch(s, a, true);
10140 }
10141
10142 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
10143 {
10144     TCGv_i32 tmp = load_reg(s, a->rn);
10145
10146     arm_gen_condlabel(s);
10147     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
10148                         tmp, 0, s->condlabel);
10149     tcg_temp_free_i32(tmp);
10150     gen_jmp(s, read_pc(s) + a->imm);
10151     return true;
10152 }
10153
10154 /*
10155  * Supervisor call - both T32 & A32 come here so we need to check
10156  * which mode we are in when checking for semihosting.
10157  */
10158
10159 static bool trans_SVC(DisasContext *s, arg_SVC *a)
10160 {
10161     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
10162
10163     if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
10164 #ifndef CONFIG_USER_ONLY
10165         !IS_USER(s) &&
10166 #endif
10167         (a->imm == semihost_imm)) {
10168         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
10169     } else {
10170         gen_set_pc_im(s, s->base.pc_next);
10171         s->svc_imm = a->imm;
10172         s->base.is_jmp = DISAS_SWI;
10173     }
10174     return true;
10175 }
10176
10177 /*
10178  * Unconditional system instructions
10179  */
10180
10181 static bool trans_RFE(DisasContext *s, arg_RFE *a)
10182 {
10183     static const int8_t pre_offset[4] = {
10184         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
10185     };
10186     static const int8_t post_offset[4] = {
10187         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
10188     };
10189     TCGv_i32 addr, t1, t2;
10190
10191     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10192         return false;
10193     }
10194     if (IS_USER(s)) {
10195         unallocated_encoding(s);
10196         return true;
10197     }
10198
10199     addr = load_reg(s, a->rn);
10200     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
10201
10202     /* Load PC into tmp and CPSR into tmp2.  */
10203     t1 = tcg_temp_new_i32();
10204     gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
10205     tcg_gen_addi_i32(addr, addr, 4);
10206     t2 = tcg_temp_new_i32();
10207     gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
10208
10209     if (a->w) {
10210         /* Base writeback.  */
10211         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
10212         store_reg(s, a->rn, addr);
10213     } else {
10214         tcg_temp_free_i32(addr);
10215     }
10216     gen_rfe(s, t1, t2);
10217     return true;
10218 }
10219
10220 static bool trans_SRS(DisasContext *s, arg_SRS *a)
10221 {
10222     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10223         return false;
10224     }
10225     gen_srs(s, a->mode, a->pu, a->w);
10226     return true;
10227 }
10228
10229 static bool trans_CPS(DisasContext *s, arg_CPS *a)
10230 {
10231     uint32_t mask, val;
10232
10233     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
10234         return false;
10235     }
10236     if (IS_USER(s)) {
10237         /* Implemented as NOP in user mode.  */
10238         return true;
10239     }
10240     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
10241
10242     mask = val = 0;
10243     if (a->imod & 2) {
10244         if (a->A) {
10245             mask |= CPSR_A;
10246         }
10247         if (a->I) {
10248             mask |= CPSR_I;
10249         }
10250         if (a->F) {
10251             mask |= CPSR_F;
10252         }
10253         if (a->imod & 1) {
10254             val |= mask;
10255         }
10256     }
10257     if (a->M) {
10258         mask |= CPSR_M;
10259         val |= a->mode;
10260     }
10261     if (mask) {
10262         gen_set_psr_im(s, mask, 0, val);
10263     }
10264     return true;
10265 }
10266
10267 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
10268 {
10269     TCGv_i32 tmp, addr, el;
10270
10271     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
10272         return false;
10273     }
10274     if (IS_USER(s)) {
10275         /* Implemented as NOP in user mode.  */
10276         return true;
10277     }
10278
10279     tmp = tcg_const_i32(a->im);
10280     /* FAULTMASK */
10281     if (a->F) {
10282         addr = tcg_const_i32(19);
10283         gen_helper_v7m_msr(cpu_env, addr, tmp);
10284         tcg_temp_free_i32(addr);
10285     }
10286     /* PRIMASK */
10287     if (a->I) {
10288         addr = tcg_const_i32(16);
10289         gen_helper_v7m_msr(cpu_env, addr, tmp);
10290         tcg_temp_free_i32(addr);
10291     }
10292     el = tcg_const_i32(s->current_el);
10293     gen_helper_rebuild_hflags_m32(cpu_env, el);
10294     tcg_temp_free_i32(el);
10295     tcg_temp_free_i32(tmp);
10296     gen_lookup_tb(s);
10297     return true;
10298 }
10299
10300 /*
10301  * Clear-Exclusive, Barriers
10302  */
10303
10304 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
10305 {
10306     if (s->thumb
10307         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
10308         : !ENABLE_ARCH_6K) {
10309         return false;
10310     }
10311     gen_clrex(s);
10312     return true;
10313 }
10314
10315 static bool trans_DSB(DisasContext *s, arg_DSB *a)
10316 {
10317     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10318         return false;
10319     }
10320     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10321     return true;
10322 }
10323
10324 static bool trans_DMB(DisasContext *s, arg_DMB *a)
10325 {
10326     return trans_DSB(s, NULL);
10327 }
10328
10329 static bool trans_ISB(DisasContext *s, arg_ISB *a)
10330 {
10331     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10332         return false;
10333     }
10334     /*
10335      * We need to break the TB after this insn to execute
10336      * self-modifying code correctly and also to take
10337      * any pending interrupts immediately.
10338      */
10339     gen_goto_tb(s, 0, s->base.pc_next);
10340     return true;
10341 }
10342
10343 static bool trans_SB(DisasContext *s, arg_SB *a)
10344 {
10345     if (!dc_isar_feature(aa32_sb, s)) {
10346         return false;
10347     }
10348     /*
10349      * TODO: There is no speculation barrier opcode
10350      * for TCG; MB and end the TB instead.
10351      */
10352     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
10353     gen_goto_tb(s, 0, s->base.pc_next);
10354     return true;
10355 }
10356
10357 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
10358 {
10359     if (!ENABLE_ARCH_6) {
10360         return false;
10361     }
10362     if (a->E != (s->be_data == MO_BE)) {
10363         gen_helper_setend(cpu_env);
10364         s->base.is_jmp = DISAS_UPDATE;
10365     }
10366     return true;
10367 }
10368
10369 /*
10370  * Preload instructions
10371  * All are nops, contingent on the appropriate arch level.
10372  */
10373
10374 static bool trans_PLD(DisasContext *s, arg_PLD *a)
10375 {
10376     return ENABLE_ARCH_5TE;
10377 }
10378
10379 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
10380 {
10381     return arm_dc_feature(s, ARM_FEATURE_V7MP);
10382 }
10383
10384 static bool trans_PLI(DisasContext *s, arg_PLD *a)
10385 {
10386     return ENABLE_ARCH_7;
10387 }
10388
10389 /*
10390  * If-then
10391  */
10392
10393 static bool trans_IT(DisasContext *s, arg_IT *a)
10394 {
10395     int cond_mask = a->cond_mask;
10396
10397     /*
10398      * No actual code generated for this insn, just setup state.
10399      *
10400      * Combinations of firstcond and mask which set up an 0b1111
10401      * condition are UNPREDICTABLE; we take the CONSTRAINED
10402      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
10403      * i.e. both meaning "execute always".
10404      */
10405     s->condexec_cond = (cond_mask >> 4) & 0xe;
10406     s->condexec_mask = cond_mask & 0x1f;
10407     return true;
10408 }
10409
10410 /*
10411  * Legacy decoder.
10412  */
10413
10414 static void disas_arm_insn(DisasContext *s, unsigned int insn)
10415 {
10416     unsigned int cond = insn >> 28;
10417
10418     /* M variants do not implement ARM mode; this must raise the INVSTATE
10419      * UsageFault exception.
10420      */
10421     if (arm_dc_feature(s, ARM_FEATURE_M)) {
10422         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
10423                            default_exception_el(s));
10424         return;
10425     }
10426
10427     if (cond == 0xf) {
10428         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
10429          * choose to UNDEF. In ARMv5 and above the space is used
10430          * for miscellaneous unconditional instructions.
10431          */
10432         ARCH(5);
10433
10434         /* Unconditional instructions.  */
10435         /* TODO: Perhaps merge these into one decodetree output file.  */
10436         if (disas_a32_uncond(s, insn) ||
10437             disas_vfp_uncond(s, insn) ||
10438             disas_neon_dp(s, insn) ||
10439             disas_neon_ls(s, insn) ||
10440             disas_neon_shared(s, insn)) {
10441             return;
10442         }
10443         /* fall back to legacy decoder */
10444
10445         if (((insn >> 25) & 7) == 1) {
10446             /* NEON Data processing.  */
10447             if (disas_neon_data_insn(s, insn)) {
10448                 goto illegal_op;
10449             }
10450             return;
10451         }
10452         if ((insn & 0x0e000f00) == 0x0c000100) {
10453             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
10454                 /* iWMMXt register transfer.  */
10455                 if (extract32(s->c15_cpar, 1, 1)) {
10456                     if (!disas_iwmmxt_insn(s, insn)) {
10457                         return;
10458                     }
10459                 }
10460             }
10461         }
10462         goto illegal_op;
10463     }
10464     if (cond != 0xe) {
10465         /* if not always execute, we generate a conditional jump to
10466            next instruction */
10467         arm_skip_unless(s, cond);
10468     }
10469
10470     /* TODO: Perhaps merge these into one decodetree output file.  */
10471     if (disas_a32(s, insn) ||
10472         disas_vfp(s, insn)) {
10473         return;
10474     }
10475     /* fall back to legacy decoder */
10476
10477     switch ((insn >> 24) & 0xf) {
10478     case 0xc:
10479     case 0xd:
10480     case 0xe:
10481         if (((insn >> 8) & 0xe) == 10) {
10482             /* VFP, but failed disas_vfp.  */
10483             goto illegal_op;
10484         }
10485         if (disas_coproc_insn(s, insn)) {
10486             /* Coprocessor.  */
10487             goto illegal_op;
10488         }
10489         break;
10490     default:
10491     illegal_op:
10492         unallocated_encoding(s);
10493         break;
10494     }
10495 }
10496
10497 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
10498 {
10499     /*
10500      * Return true if this is a 16 bit instruction. We must be precise
10501      * about this (matching the decode).
10502      */
10503     if ((insn >> 11) < 0x1d) {
10504         /* Definitely a 16-bit instruction */
10505         return true;
10506     }
10507
10508     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
10509      * first half of a 32-bit Thumb insn. Thumb-1 cores might
10510      * end up actually treating this as two 16-bit insns, though,
10511      * if it's half of a bl/blx pair that might span a page boundary.
10512      */
10513     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
10514         arm_dc_feature(s, ARM_FEATURE_M)) {
10515         /* Thumb2 cores (including all M profile ones) always treat
10516          * 32-bit insns as 32-bit.
10517          */
10518         return false;
10519     }
10520
10521     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
10522         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
10523          * is not on the next page; we merge this into a 32-bit
10524          * insn.
10525          */
10526         return false;
10527     }
10528     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
10529      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
10530      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
10531      *  -- handle as single 16 bit insn
10532      */
10533     return true;
10534 }
10535
10536 /* Translate a 32-bit thumb instruction. */
10537 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
10538 {
10539     /*
10540      * ARMv6-M supports a limited subset of Thumb2 instructions.
10541      * Other Thumb1 architectures allow only 32-bit
10542      * combined BL/BLX prefix and suffix.
10543      */
10544     if (arm_dc_feature(s, ARM_FEATURE_M) &&
10545         !arm_dc_feature(s, ARM_FEATURE_V7)) {
10546         int i;
10547         bool found = false;
10548         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
10549                                                0xf3b08040 /* dsb */,
10550                                                0xf3b08050 /* dmb */,
10551                                                0xf3b08060 /* isb */,
10552                                                0xf3e08000 /* mrs */,
10553                                                0xf000d000 /* bl */};
10554         static const uint32_t armv6m_mask[] = {0xffe0d000,
10555                                                0xfff0d0f0,
10556                                                0xfff0d0f0,
10557                                                0xfff0d0f0,
10558                                                0xffe0d000,
10559                                                0xf800d000};
10560
10561         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
10562             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
10563                 found = true;
10564                 break;
10565             }
10566         }
10567         if (!found) {
10568             goto illegal_op;
10569         }
10570     } else if ((insn & 0xf800e800) != 0xf000e800)  {
10571         ARCH(6T2);
10572     }
10573
10574     if ((insn & 0xef000000) == 0xef000000) {
10575         /*
10576          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10577          * transform into
10578          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
10579          */
10580         uint32_t a32_insn = (insn & 0xe2ffffff) |
10581             ((insn & (1 << 28)) >> 4) | (1 << 28);
10582
10583         if (disas_neon_dp(s, a32_insn)) {
10584             return;
10585         }
10586     }
10587
10588     if ((insn & 0xff100000) == 0xf9000000) {
10589         /*
10590          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10591          * transform into
10592          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
10593          */
10594         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
10595
10596         if (disas_neon_ls(s, a32_insn)) {
10597             return;
10598         }
10599     }
10600
10601     /*
10602      * TODO: Perhaps merge these into one decodetree output file.
10603      * Note disas_vfp is written for a32 with cond field in the
10604      * top nibble.  The t32 encoding requires 0xe in the top nibble.
10605      */
10606     if (disas_t32(s, insn) ||
10607         disas_vfp_uncond(s, insn) ||
10608         disas_neon_shared(s, insn) ||
10609         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
10610         return;
10611     }
10612     /* fall back to legacy decoder */
10613
10614     switch ((insn >> 25) & 0xf) {
10615     case 0: case 1: case 2: case 3:
10616         /* 16-bit instructions.  Should never happen.  */
10617         abort();
10618     case 6: case 7: case 14: case 15:
10619         /* Coprocessor.  */
10620         if (arm_dc_feature(s, ARM_FEATURE_M)) {
10621             /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
10622             if (extract32(insn, 24, 2) == 3) {
10623                 goto illegal_op; /* op0 = 0b11 : unallocated */
10624             }
10625
10626             if (((insn >> 8) & 0xe) == 10 &&
10627                 dc_isar_feature(aa32_fpsp_v2, s)) {
10628                 /* FP, and the CPU supports it */
10629                 goto illegal_op;
10630             } else {
10631                 /* All other insns: NOCP */
10632                 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
10633                                    syn_uncategorized(),
10634                                    default_exception_el(s));
10635             }
10636             break;
10637         }
10638         if (((insn >> 24) & 3) == 3) {
10639             /* Translate into the equivalent ARM encoding.  */
10640             insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
10641             if (disas_neon_data_insn(s, insn)) {
10642                 goto illegal_op;
10643             }
10644         } else if (((insn >> 8) & 0xe) == 10) {
10645             /* VFP, but failed disas_vfp.  */
10646             goto illegal_op;
10647         } else {
10648             if (insn & (1 << 28))
10649                 goto illegal_op;
10650             if (disas_coproc_insn(s, insn)) {
10651                 goto illegal_op;
10652             }
10653         }
10654         break;
10655     case 12:
10656         goto illegal_op;
10657     default:
10658     illegal_op:
10659         unallocated_encoding(s);
10660     }
10661 }
10662
10663 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
10664 {
10665     if (!disas_t16(s, insn)) {
10666         unallocated_encoding(s);
10667     }
10668 }
10669
10670 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
10671 {
10672     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
10673      * (False positives are OK, false negatives are not.)
10674      * We know this is a Thumb insn, and our caller ensures we are
10675      * only called if dc->base.pc_next is less than 4 bytes from the page
10676      * boundary, so we cross the page if the first 16 bits indicate
10677      * that this is a 32 bit insn.
10678      */
10679     uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
10680
10681     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
10682 }
10683
10684 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
10685 {
10686     DisasContext *dc = container_of(dcbase, DisasContext, base);
10687     CPUARMState *env = cs->env_ptr;
10688     ARMCPU *cpu = env_archcpu(env);
10689     uint32_t tb_flags = dc->base.tb->flags;
10690     uint32_t condexec, core_mmu_idx;
10691
10692     dc->isar = &cpu->isar;
10693     dc->condjmp = 0;
10694
10695     dc->aarch64 = 0;
10696     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
10697      * there is no secure EL1, so we route exceptions to EL3.
10698      */
10699     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
10700                                !arm_el_is_aa64(env, 3);
10701     dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
10702     dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10703     condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
10704     dc->condexec_mask = (condexec & 0xf) << 1;
10705     dc->condexec_cond = condexec >> 4;
10706
10707     core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
10708     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
10709     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10710 #if !defined(CONFIG_USER_ONLY)
10711     dc->user = (dc->current_el == 0);
10712 #endif
10713     dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
10714
10715     if (arm_feature(env, ARM_FEATURE_M)) {
10716         dc->vfp_enabled = 1;
10717         dc->be_data = MO_TE;
10718         dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
10719         dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
10720             regime_is_secure(env, dc->mmu_idx);
10721         dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
10722         dc->v8m_fpccr_s_wrong =
10723             FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
10724         dc->v7m_new_fp_ctxt_needed =
10725             FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
10726         dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
10727     } else {
10728         dc->be_data =
10729             FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10730         dc->debug_target_el =
10731             FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
10732         dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
10733         dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
10734         dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
10735         dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
10736         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
10737             dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
10738         } else {
10739             dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
10740             dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
10741         }
10742     }
10743     dc->cp_regs = cpu->cp_regs;
10744     dc->features = env->features;
10745
10746     /* Single step state. The code-generation logic here is:
10747      *  SS_ACTIVE == 0:
10748      *   generate code with no special handling for single-stepping (except
10749      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10750      *   this happens anyway because those changes are all system register or
10751      *   PSTATE writes).
10752      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10753      *   emit code for one insn
10754      *   emit code to clear PSTATE.SS
10755      *   emit code to generate software step exception for completed step
10756      *   end TB (as usual for having generated an exception)
10757      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10758      *   emit code to generate a software step exception
10759      *   end the TB
10760      */
10761     dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
10762     dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
10763     dc->is_ldex = false;
10764
10765     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
10766
10767     /* If architectural single step active, limit to 1.  */
10768     if (is_singlestepping(dc)) {
10769         dc->base.max_insns = 1;
10770     }
10771
10772     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
10773        to those left on the page.  */
10774     if (!dc->thumb) {
10775         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10776         dc->base.max_insns = MIN(dc->base.max_insns, bound);
10777     }
10778
10779     cpu_V0 = tcg_temp_new_i64();
10780     cpu_V1 = tcg_temp_new_i64();
10781     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
10782     cpu_M0 = tcg_temp_new_i64();
10783 }
10784
10785 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
10786 {
10787     DisasContext *dc = container_of(dcbase, DisasContext, base);
10788
10789     /* A note on handling of the condexec (IT) bits:
10790      *
10791      * We want to avoid the overhead of having to write the updated condexec
10792      * bits back to the CPUARMState for every instruction in an IT block. So:
10793      * (1) if the condexec bits are not already zero then we write
10794      * zero back into the CPUARMState now. This avoids complications trying
10795      * to do it at the end of the block. (For example if we don't do this
10796      * it's hard to identify whether we can safely skip writing condexec
10797      * at the end of the TB, which we definitely want to do for the case
10798      * where a TB doesn't do anything with the IT state at all.)
10799      * (2) if we are going to leave the TB then we call gen_set_condexec()
10800      * which will write the correct value into CPUARMState if zero is wrong.
10801      * This is done both for leaving the TB at the end, and for leaving
10802      * it because of an exception we know will happen, which is done in
10803      * gen_exception_insn(). The latter is necessary because we need to
10804      * leave the TB with the PC/IT state just prior to execution of the
10805      * instruction which caused the exception.
10806      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
10807      * then the CPUARMState will be wrong and we need to reset it.
10808      * This is handled in the same way as restoration of the
10809      * PC in these situations; we save the value of the condexec bits
10810      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
10811      * then uses this to restore them after an exception.
10812      *
10813      * Note that there are no instructions which can read the condexec
10814      * bits, and none which can write non-static values to them, so
10815      * we don't need to care about whether CPUARMState is correct in the
10816      * middle of a TB.
10817      */
10818
10819     /* Reset the conditional execution bits immediately. This avoids
10820        complications trying to do it at the end of the block.  */
10821     if (dc->condexec_mask || dc->condexec_cond) {
10822         TCGv_i32 tmp = tcg_temp_new_i32();
10823         tcg_gen_movi_i32(tmp, 0);
10824         store_cpu_field(tmp, condexec_bits);
10825     }
10826 }
10827
10828 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10829 {
10830     DisasContext *dc = container_of(dcbase, DisasContext, base);
10831
10832     tcg_gen_insn_start(dc->base.pc_next,
10833                        (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
10834                        0);
10835     dc->insn_start = tcg_last_op();
10836 }
10837
10838 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
10839                                     const CPUBreakpoint *bp)
10840 {
10841     DisasContext *dc = container_of(dcbase, DisasContext, base);
10842
10843     if (bp->flags & BP_CPU) {
10844         gen_set_condexec(dc);
10845         gen_set_pc_im(dc, dc->base.pc_next);
10846         gen_helper_check_breakpoints(cpu_env);
10847         /* End the TB early; it's likely not going to be executed */
10848         dc->base.is_jmp = DISAS_TOO_MANY;
10849     } else {
10850         gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
10851         /* The address covered by the breakpoint must be
10852            included in [tb->pc, tb->pc + tb->size) in order
10853            to for it to be properly cleared -- thus we
10854            increment the PC here so that the logic setting
10855            tb->size below does the right thing.  */
10856         /* TODO: Advance PC by correct instruction length to
10857          * avoid disassembler error messages */
10858         dc->base.pc_next += 2;
10859         dc->base.is_jmp = DISAS_NORETURN;
10860     }
10861
10862     return true;
10863 }
10864
10865 static bool arm_pre_translate_insn(DisasContext *dc)
10866 {
10867 #ifdef CONFIG_USER_ONLY
10868     /* Intercept jump to the magic kernel page.  */
10869     if (dc->base.pc_next >= 0xffff0000) {
10870         /* We always get here via a jump, so know we are not in a
10871            conditional execution block.  */
10872         gen_exception_internal(EXCP_KERNEL_TRAP);
10873         dc->base.is_jmp = DISAS_NORETURN;
10874         return true;
10875     }
10876 #endif
10877
10878     if (dc->ss_active && !dc->pstate_ss) {
10879         /* Singlestep state is Active-pending.
10880          * If we're in this state at the start of a TB then either
10881          *  a) we just took an exception to an EL which is being debugged
10882          *     and this is the first insn in the exception handler
10883          *  b) debug exceptions were masked and we just unmasked them
10884          *     without changing EL (eg by clearing PSTATE.D)
10885          * In either case we're going to take a swstep exception in the
10886          * "did not step an insn" case, and so the syndrome ISV and EX
10887          * bits should be zero.
10888          */
10889         assert(dc->base.num_insns == 1);
10890         gen_swstep_exception(dc, 0, 0);
10891         dc->base.is_jmp = DISAS_NORETURN;
10892         return true;
10893     }
10894
10895     return false;
10896 }
10897
10898 static void arm_post_translate_insn(DisasContext *dc)
10899 {
10900     if (dc->condjmp && !dc->base.is_jmp) {
10901         gen_set_label(dc->condlabel);
10902         dc->condjmp = 0;
10903     }
10904     translator_loop_temp_check(&dc->base);
10905 }
10906
10907 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10908 {
10909     DisasContext *dc = container_of(dcbase, DisasContext, base);
10910     CPUARMState *env = cpu->env_ptr;
10911     unsigned int insn;
10912
10913     if (arm_pre_translate_insn(dc)) {
10914         return;
10915     }
10916
10917     dc->pc_curr = dc->base.pc_next;
10918     insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
10919     dc->insn = insn;
10920     dc->base.pc_next += 4;
10921     disas_arm_insn(dc, insn);
10922
10923     arm_post_translate_insn(dc);
10924
10925     /* ARM is a fixed-length ISA.  We performed the cross-page check
10926        in init_disas_context by adjusting max_insns.  */
10927 }
10928
10929 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
10930 {
10931     /* Return true if this Thumb insn is always unconditional,
10932      * even inside an IT block. This is true of only a very few
10933      * instructions: BKPT, HLT, and SG.
10934      *
10935      * A larger class of instructions are UNPREDICTABLE if used
10936      * inside an IT block; we do not need to detect those here, because
10937      * what we do by default (perform the cc check and update the IT
10938      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
10939      * choice for those situations.
10940      *
10941      * insn is either a 16-bit or a 32-bit instruction; the two are
10942      * distinguishable because for the 16-bit case the top 16 bits
10943      * are zeroes, and that isn't a valid 32-bit encoding.
10944      */
10945     if ((insn & 0xffffff00) == 0xbe00) {
10946         /* BKPT */
10947         return true;
10948     }
10949
10950     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
10951         !arm_dc_feature(s, ARM_FEATURE_M)) {
10952         /* HLT: v8A only. This is unconditional even when it is going to
10953          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
10954          * For v7 cores this was a plain old undefined encoding and so
10955          * honours its cc check. (We might be using the encoding as
10956          * a semihosting trap, but we don't change the cc check behaviour
10957          * on that account, because a debugger connected to a real v7A
10958          * core and emulating semihosting traps by catching the UNDEF
10959          * exception would also only see cases where the cc check passed.
10960          * No guest code should be trying to do a HLT semihosting trap
10961          * in an IT block anyway.
10962          */
10963         return true;
10964     }
10965
10966     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
10967         arm_dc_feature(s, ARM_FEATURE_M)) {
10968         /* SG: v8M only */
10969         return true;
10970     }
10971
10972     return false;
10973 }
10974
10975 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10976 {
10977     DisasContext *dc = container_of(dcbase, DisasContext, base);
10978     CPUARMState *env = cpu->env_ptr;
10979     uint32_t insn;
10980     bool is_16bit;
10981
10982     if (arm_pre_translate_insn(dc)) {
10983         return;
10984     }
10985
10986     dc->pc_curr = dc->base.pc_next;
10987     insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
10988     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
10989     dc->base.pc_next += 2;
10990     if (!is_16bit) {
10991         uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
10992
10993         insn = insn << 16 | insn2;
10994         dc->base.pc_next += 2;
10995     }
10996     dc->insn = insn;
10997
10998     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
10999         uint32_t cond = dc->condexec_cond;
11000
11001         /*
11002          * Conditionally skip the insn. Note that both 0xe and 0xf mean
11003          * "always"; 0xf is not "never".
11004          */
11005         if (cond < 0x0e) {
11006             arm_skip_unless(dc, cond);
11007         }
11008     }
11009
11010     if (is_16bit) {
11011         disas_thumb_insn(dc, insn);
11012     } else {
11013         disas_thumb2_insn(dc, insn);
11014     }
11015
11016     /* Advance the Thumb condexec condition.  */
11017     if (dc->condexec_mask) {
11018         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
11019                              ((dc->condexec_mask >> 4) & 1));
11020         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
11021         if (dc->condexec_mask == 0) {
11022             dc->condexec_cond = 0;
11023         }
11024     }
11025
11026     arm_post_translate_insn(dc);
11027
11028     /* Thumb is a variable-length ISA.  Stop translation when the next insn
11029      * will touch a new page.  This ensures that prefetch aborts occur at
11030      * the right place.
11031      *
11032      * We want to stop the TB if the next insn starts in a new page,
11033      * or if it spans between this page and the next. This means that
11034      * if we're looking at the last halfword in the page we need to
11035      * see if it's a 16-bit Thumb insn (which will fit in this TB)
11036      * or a 32-bit Thumb insn (which won't).
11037      * This is to avoid generating a silly TB with a single 16-bit insn
11038      * in it at the end of this page (which would execute correctly
11039      * but isn't very efficient).
11040      */
11041     if (dc->base.is_jmp == DISAS_NEXT
11042         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
11043             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
11044                 && insn_crosses_page(env, dc)))) {
11045         dc->base.is_jmp = DISAS_TOO_MANY;
11046     }
11047 }
11048
11049 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
11050 {
11051     DisasContext *dc = container_of(dcbase, DisasContext, base);
11052
11053     if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
11054         /* FIXME: This can theoretically happen with self-modifying code. */
11055         cpu_abort(cpu, "IO on conditional branch instruction");
11056     }
11057
11058     /* At this stage dc->condjmp will only be set when the skipped
11059        instruction was a conditional branch or trap, and the PC has
11060        already been written.  */
11061     gen_set_condexec(dc);
11062     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
11063         /* Exception return branches need some special case code at the
11064          * end of the TB, which is complex enough that it has to
11065          * handle the single-step vs not and the condition-failed
11066          * insn codepath itself.
11067          */
11068         gen_bx_excret_final_code(dc);
11069     } else if (unlikely(is_singlestepping(dc))) {
11070         /* Unconditional and "condition passed" instruction codepath. */
11071         switch (dc->base.is_jmp) {
11072         case DISAS_SWI:
11073             gen_ss_advance(dc);
11074             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11075                           default_exception_el(dc));
11076             break;
11077         case DISAS_HVC:
11078             gen_ss_advance(dc);
11079             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11080             break;
11081         case DISAS_SMC:
11082             gen_ss_advance(dc);
11083             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11084             break;
11085         case DISAS_NEXT:
11086         case DISAS_TOO_MANY:
11087         case DISAS_UPDATE:
11088             gen_set_pc_im(dc, dc->base.pc_next);
11089             /* fall through */
11090         default:
11091             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
11092             gen_singlestep_exception(dc);
11093             break;
11094         case DISAS_NORETURN:
11095             break;
11096         }
11097     } else {
11098         /* While branches must always occur at the end of an IT block,
11099            there are a few other things that can cause us to terminate
11100            the TB in the middle of an IT block:
11101             - Exception generating instructions (bkpt, swi, undefined).
11102             - Page boundaries.
11103             - Hardware watchpoints.
11104            Hardware breakpoints have already been handled and skip this code.
11105          */
11106         switch(dc->base.is_jmp) {
11107         case DISAS_NEXT:
11108         case DISAS_TOO_MANY:
11109             gen_goto_tb(dc, 1, dc->base.pc_next);
11110             break;
11111         case DISAS_JUMP:
11112             gen_goto_ptr();
11113             break;
11114         case DISAS_UPDATE:
11115             gen_set_pc_im(dc, dc->base.pc_next);
11116             /* fall through */
11117         default:
11118             /* indicate that the hash table must be used to find the next TB */
11119             tcg_gen_exit_tb(NULL, 0);
11120             break;
11121         case DISAS_NORETURN:
11122             /* nothing more to generate */
11123             break;
11124         case DISAS_WFI:
11125         {
11126             TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
11127                                           !(dc->insn & (1U << 31))) ? 2 : 4);
11128
11129             gen_helper_wfi(cpu_env, tmp);
11130             tcg_temp_free_i32(tmp);
11131             /* The helper doesn't necessarily throw an exception, but we
11132              * must go back to the main loop to check for interrupts anyway.
11133              */
11134             tcg_gen_exit_tb(NULL, 0);
11135             break;
11136         }
11137         case DISAS_WFE:
11138             gen_helper_wfe(cpu_env);
11139             break;
11140         case DISAS_YIELD:
11141             gen_helper_yield(cpu_env);
11142             break;
11143         case DISAS_SWI:
11144             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11145                           default_exception_el(dc));
11146             break;
11147         case DISAS_HVC:
11148             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
11149             break;
11150         case DISAS_SMC:
11151             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
11152             break;
11153         }
11154     }
11155
11156     if (dc->condjmp) {
11157         /* "Condition failed" instruction codepath for the branch/trap insn */
11158         gen_set_label(dc->condlabel);
11159         gen_set_condexec(dc);
11160         if (unlikely(is_singlestepping(dc))) {
11161             gen_set_pc_im(dc, dc->base.pc_next);
11162             gen_singlestep_exception(dc);
11163         } else {
11164             gen_goto_tb(dc, 1, dc->base.pc_next);
11165         }
11166     }
11167 }
11168
11169 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
11170 {
11171     DisasContext *dc = container_of(dcbase, DisasContext, base);
11172
11173     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
11174     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
11175 }
11176
11177 static const TranslatorOps arm_translator_ops = {
11178     .init_disas_context = arm_tr_init_disas_context,
11179     .tb_start           = arm_tr_tb_start,
11180     .insn_start         = arm_tr_insn_start,
11181     .breakpoint_check   = arm_tr_breakpoint_check,
11182     .translate_insn     = arm_tr_translate_insn,
11183     .tb_stop            = arm_tr_tb_stop,
11184     .disas_log          = arm_tr_disas_log,
11185 };
11186
11187 static const TranslatorOps thumb_translator_ops = {
11188     .init_disas_context = arm_tr_init_disas_context,
11189     .tb_start           = arm_tr_tb_start,
11190     .insn_start         = arm_tr_insn_start,
11191     .breakpoint_check   = arm_tr_breakpoint_check,
11192     .translate_insn     = thumb_tr_translate_insn,
11193     .tb_stop            = arm_tr_tb_stop,
11194     .disas_log          = arm_tr_disas_log,
11195 };
11196
11197 /* generate intermediate code for basic block 'tb'.  */
11198 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
11199 {
11200     DisasContext dc = { };
11201     const TranslatorOps *ops = &arm_translator_ops;
11202
11203     if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
11204         ops = &thumb_translator_ops;
11205     }
11206 #ifdef TARGET_AARCH64
11207     if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
11208         ops = &aarch64_translator_ops;
11209     }
11210 #endif
11211
11212     translator_loop(ops, &dc.base, cpu, tb, max_insns);
11213 }
11214
11215 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
11216                           target_ulong *data)
11217 {
11218     if (is_a64(env)) {
11219         env->pc = data[0];
11220         env->condexec_bits = 0;
11221         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11222     } else {
11223         env->regs[15] = data[0];
11224         env->condexec_bits = data[1];
11225         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
11226     }
11227 }