target/arm/translate.c

   1 /*
   2  *  ARM translation
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20  */
  21 #include "qemu/osdep.h"
  22
  23 #include "cpu.h"
  24 #include "internals.h"
  25 #include "disas/disas.h"
  26 #include "exec/exec-all.h"
  27 #include "tcg/tcg-op.h"
  28 #include "tcg/tcg-op-gvec.h"
  29 #include "qemu/log.h"
  30 #include "qemu/bitops.h"
  31 #include "arm_ldst.h"
  32 #include "hw/semihosting/semihost.h"
  33
  34 #include "exec/helper-proto.h"
  35 #include "exec/helper-gen.h"
  36
  37 #include "trace-tcg.h"
  38 #include "exec/log.h"
  39
  40
  41 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  42 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  43 /* currently all emulated v5 cores are also v5TE, so don't bother */
  44 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  45 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  46 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  47 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  48 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  49 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  50 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  51
  52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
  53
  54 #include "translate.h"
  55
  56 #if defined(CONFIG_USER_ONLY)
  57 #define IS_USER(s) 1
  58 #else
  59 #define IS_USER(s) (s->user)
  60 #endif
  61
  62 /* We reuse the same 64-bit temporaries for efficiency.  */
  63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  64 static TCGv_i32 cpu_R[16];
  65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  66 TCGv_i64 cpu_exclusive_addr;
  67 TCGv_i64 cpu_exclusive_val;
  68
  69 #include "exec/gen-icount.h"
  70
  71 static const char * const regnames[] =
  72     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  73       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  74
  75 /* Function prototypes for gen_ functions calling Neon helpers.  */
  76 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
  77                                  TCGv_i32, TCGv_i32);
  78 /* Function prototypes for gen_ functions for fix point conversions */
  79 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  80
  81 /* initialize TCG globals.  */
  82 void arm_translate_init(void)
  83 {
  84     int i;
  85
  86     for (i = 0; i < 16; i++) {
  87         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  88                                           offsetof(CPUARMState, regs[i]),
  89                                           regnames[i]);
  90     }
  91     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  92     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  93     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  94     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  95
  96     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  97         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  98     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  99         offsetof(CPUARMState, exclusive_val), "exclusive_val");
 100
 101     a64_translate_init();
 102 }
 103
 104 /* Flags for the disas_set_da_iss info argument:
 105  * lower bits hold the Rt register number, higher bits are flags.
 106  */
 107 typedef enum ISSInfo {
 108     ISSNone = 0,
 109     ISSRegMask = 0x1f,
 110     ISSInvalid = (1 << 5),
 111     ISSIsAcqRel = (1 << 6),
 112     ISSIsWrite = (1 << 7),
 113     ISSIs16Bit = (1 << 8),
 114 } ISSInfo;
 115
 116 /* Save the syndrome information for a Data Abort */
 117 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 118 {
 119     uint32_t syn;
 120     int sas = memop & MO_SIZE;
 121     bool sse = memop & MO_SIGN;
 122     bool is_acqrel = issinfo & ISSIsAcqRel;
 123     bool is_write = issinfo & ISSIsWrite;
 124     bool is_16bit = issinfo & ISSIs16Bit;
 125     int srt = issinfo & ISSRegMask;
 126
 127     if (issinfo & ISSInvalid) {
 128         /* Some callsites want to conditionally provide ISS info,
 129          * eg "only if this was not a writeback"
 130          */
 131         return;
 132     }
 133
 134     if (srt == 15) {
 135         /* For AArch32, insns where the src/dest is R15 never generate
 136          * ISS information. Catching that here saves checking at all
 137          * the call sites.
 138          */
 139         return;
 140     }
 141
 142     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 143                                   0, 0, 0, is_write, 0, is_16bit);
 144     disas_set_insn_syndrome(s, syn);
 145 }
 146
 147 static inline int get_a32_user_mem_index(DisasContext *s)
 148 {
 149     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 150      * insns:
 151      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 152      *  otherwise, access as if at PL0.
 153      */
 154     switch (s->mmu_idx) {
 155     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 156     case ARMMMUIdx_E10_0:
 157     case ARMMMUIdx_E10_1:
 158     case ARMMMUIdx_E10_1_PAN:
 159         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 160     case ARMMMUIdx_SE3:
 161     case ARMMMUIdx_SE10_0:
 162     case ARMMMUIdx_SE10_1:
 163     case ARMMMUIdx_SE10_1_PAN:
 164         return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
 165     case ARMMMUIdx_MUser:
 166     case ARMMMUIdx_MPriv:
 167         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 168     case ARMMMUIdx_MUserNegPri:
 169     case ARMMMUIdx_MPrivNegPri:
 170         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 171     case ARMMMUIdx_MSUser:
 172     case ARMMMUIdx_MSPriv:
 173         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 174     case ARMMMUIdx_MSUserNegPri:
 175     case ARMMMUIdx_MSPrivNegPri:
 176         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 177     default:
 178         g_assert_not_reached();
 179     }
 180 }
 181
 182 static inline TCGv_i32 load_cpu_offset(int offset)
 183 {
 184     TCGv_i32 tmp = tcg_temp_new_i32();
 185     tcg_gen_ld_i32(tmp, cpu_env, offset);
 186     return tmp;
 187 }
 188
 189 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
 190
 191 static inline void store_cpu_offset(TCGv_i32 var, int offset)
 192 {
 193     tcg_gen_st_i32(var, cpu_env, offset);
 194     tcg_temp_free_i32(var);
 195 }
 196
 197 #define store_cpu_field(var, name) \
 198     store_cpu_offset(var, offsetof(CPUARMState, name))
 199
 200 /* The architectural value of PC.  */
 201 static uint32_t read_pc(DisasContext *s)
 202 {
 203     return s->pc_curr + (s->thumb ? 4 : 8);
 204 }
 205
 206 /* Set a variable to the value of a CPU register.  */
 207 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 208 {
 209     if (reg == 15) {
 210         tcg_gen_movi_i32(var, read_pc(s));
 211     } else {
 212         tcg_gen_mov_i32(var, cpu_R[reg]);
 213     }
 214 }
 215
 216 /* Create a new temporary and set it to the value of a CPU register.  */
 217 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
 218 {
 219     TCGv_i32 tmp = tcg_temp_new_i32();
 220     load_reg_var(s, tmp, reg);
 221     return tmp;
 222 }
 223
 224 /*
 225  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 226  * This is used for load/store for which use of PC implies (literal),
 227  * or ADD that implies ADR.
 228  */
 229 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 230 {
 231     TCGv_i32 tmp = tcg_temp_new_i32();
 232
 233     if (reg == 15) {
 234         tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
 235     } else {
 236         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 237     }
 238     return tmp;
 239 }
 240
 241 /* Set a CPU register.  The source must be a temporary and will be
 242    marked as dead.  */
 243 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 244 {
 245     if (reg == 15) {
 246         /* In Thumb mode, we must ignore bit 0.
 247          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 248          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 249          * We choose to ignore [1:0] in ARM mode for all architecture versions.
 250          */
 251         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 252         s->base.is_jmp = DISAS_JUMP;
 253     }
 254     tcg_gen_mov_i32(cpu_R[reg], var);
 255     tcg_temp_free_i32(var);
 256 }
 257
 258 /*
 259  * Variant of store_reg which applies v8M stack-limit checks before updating
 260  * SP. If the check fails this will result in an exception being taken.
 261  * We disable the stack checks for CONFIG_USER_ONLY because we have
 262  * no idea what the stack limits should be in that case.
 263  * If stack checking is not being done this just acts like store_reg().
 264  */
 265 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 266 {
 267 #ifndef CONFIG_USER_ONLY
 268     if (s->v8m_stackcheck) {
 269         gen_helper_v8m_stackcheck(cpu_env, var);
 270     }
 271 #endif
 272     store_reg(s, 13, var);
 273 }
 274
 275 /* Value extensions.  */
 276 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 277 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 278 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 279 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 280
 281 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 282 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 283
 284
 285 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 286 {
 287     TCGv_i32 tmp_mask = tcg_const_i32(mask);
 288     gen_helper_cpsr_write(cpu_env, var, tmp_mask);
 289     tcg_temp_free_i32(tmp_mask);
 290 }
 291 /* Set NZCV flags from the high 4 bits of var.  */
 292 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 293
 294 static void gen_exception_internal(int excp)
 295 {
 296     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 297
 298     assert(excp_is_internal(excp));
 299     gen_helper_exception_internal(cpu_env, tcg_excp);
 300     tcg_temp_free_i32(tcg_excp);
 301 }
 302
 303 static void gen_step_complete_exception(DisasContext *s)
 304 {
 305     /* We just completed step of an insn. Move from Active-not-pending
 306      * to Active-pending, and then also take the swstep exception.
 307      * This corresponds to making the (IMPDEF) choice to prioritize
 308      * swstep exceptions over asynchronous exceptions taken to an exception
 309      * level where debug is disabled. This choice has the advantage that
 310      * we do not need to maintain internal state corresponding to the
 311      * ISV/EX syndrome bits between completion of the step and generation
 312      * of the exception, and our syndrome information is always correct.
 313      */
 314     gen_ss_advance(s);
 315     gen_swstep_exception(s, 1, s->is_ldex);
 316     s->base.is_jmp = DISAS_NORETURN;
 317 }
 318
 319 static void gen_singlestep_exception(DisasContext *s)
 320 {
 321     /* Generate the right kind of exception for singlestep, which is
 322      * either the architectural singlestep or EXCP_DEBUG for QEMU's
 323      * gdb singlestepping.
 324      */
 325     if (s->ss_active) {
 326         gen_step_complete_exception(s);
 327     } else {
 328         gen_exception_internal(EXCP_DEBUG);
 329     }
 330 }
 331
 332 static inline bool is_singlestepping(DisasContext *s)
 333 {
 334     /* Return true if we are singlestepping either because of
 335      * architectural singlestep or QEMU gdbstub singlestep. This does
 336      * not include the command line '-singlestep' mode which is rather
 337      * misnamed as it only means "one instruction per TB" and doesn't
 338      * affect the code we generate.
 339      */
 340     return s->base.singlestep_enabled || s->ss_active;
 341 }
 342
 343 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 344 {
 345     TCGv_i32 tmp1 = tcg_temp_new_i32();
 346     TCGv_i32 tmp2 = tcg_temp_new_i32();
 347     tcg_gen_ext16s_i32(tmp1, a);
 348     tcg_gen_ext16s_i32(tmp2, b);
 349     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 350     tcg_temp_free_i32(tmp2);
 351     tcg_gen_sari_i32(a, a, 16);
 352     tcg_gen_sari_i32(b, b, 16);
 353     tcg_gen_mul_i32(b, b, a);
 354     tcg_gen_mov_i32(a, tmp1);
 355     tcg_temp_free_i32(tmp1);
 356 }
 357
 358 /* Byteswap each halfword.  */
 359 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 360 {
 361     TCGv_i32 tmp = tcg_temp_new_i32();
 362     TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
 363     tcg_gen_shri_i32(tmp, var, 8);
 364     tcg_gen_and_i32(tmp, tmp, mask);
 365     tcg_gen_and_i32(var, var, mask);
 366     tcg_gen_shli_i32(var, var, 8);
 367     tcg_gen_or_i32(dest, var, tmp);
 368     tcg_temp_free_i32(mask);
 369     tcg_temp_free_i32(tmp);
 370 }
 371
 372 /* Byteswap low halfword and sign extend.  */
 373 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 374 {
 375     tcg_gen_ext16u_i32(var, var);
 376     tcg_gen_bswap16_i32(var, var);
 377     tcg_gen_ext16s_i32(dest, var);
 378 }
 379
 380 /* 32x32->64 multiply.  Marks inputs as dead.  */
 381 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
 382 {
 383     TCGv_i32 lo = tcg_temp_new_i32();
 384     TCGv_i32 hi = tcg_temp_new_i32();
 385     TCGv_i64 ret;
 386
 387     tcg_gen_mulu2_i32(lo, hi, a, b);
 388     tcg_temp_free_i32(a);
 389     tcg_temp_free_i32(b);
 390
 391     ret = tcg_temp_new_i64();
 392     tcg_gen_concat_i32_i64(ret, lo, hi);
 393     tcg_temp_free_i32(lo);
 394     tcg_temp_free_i32(hi);
 395
 396     return ret;
 397 }
 398
 399 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
 400 {
 401     TCGv_i32 lo = tcg_temp_new_i32();
 402     TCGv_i32 hi = tcg_temp_new_i32();
 403     TCGv_i64 ret;
 404
 405     tcg_gen_muls2_i32(lo, hi, a, b);
 406     tcg_temp_free_i32(a);
 407     tcg_temp_free_i32(b);
 408
 409     ret = tcg_temp_new_i64();
 410     tcg_gen_concat_i32_i64(ret, lo, hi);
 411     tcg_temp_free_i32(lo);
 412     tcg_temp_free_i32(hi);
 413
 414     return ret;
 415 }
 416
 417 /* Swap low and high halfwords.  */
 418 static void gen_swap_half(TCGv_i32 var)
 419 {
 420     tcg_gen_rotri_i32(var, var, 16);
 421 }
 422
 423 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 424     tmp = (t0 ^ t1) & 0x8000;
 425     t0 &= ~0x8000;
 426     t1 &= ~0x8000;
 427     t0 = (t0 + t1) ^ tmp;
 428  */
 429
 430 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 431 {
 432     TCGv_i32 tmp = tcg_temp_new_i32();
 433     tcg_gen_xor_i32(tmp, t0, t1);
 434     tcg_gen_andi_i32(tmp, tmp, 0x8000);
 435     tcg_gen_andi_i32(t0, t0, ~0x8000);
 436     tcg_gen_andi_i32(t1, t1, ~0x8000);
 437     tcg_gen_add_i32(t0, t0, t1);
 438     tcg_gen_xor_i32(dest, t0, tmp);
 439     tcg_temp_free_i32(tmp);
 440 }
 441
 442 /* Set N and Z flags from var.  */
 443 static inline void gen_logic_CC(TCGv_i32 var)
 444 {
 445     tcg_gen_mov_i32(cpu_NF, var);
 446     tcg_gen_mov_i32(cpu_ZF, var);
 447 }
 448
 449 /* dest = T0 + T1 + CF. */
 450 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 451 {
 452     tcg_gen_add_i32(dest, t0, t1);
 453     tcg_gen_add_i32(dest, dest, cpu_CF);
 454 }
 455
 456 /* dest = T0 - T1 + CF - 1.  */
 457 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 458 {
 459     tcg_gen_sub_i32(dest, t0, t1);
 460     tcg_gen_add_i32(dest, dest, cpu_CF);
 461     tcg_gen_subi_i32(dest, dest, 1);
 462 }
 463
 464 /* dest = T0 + T1. Compute C, N, V and Z flags */
 465 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 466 {
 467     TCGv_i32 tmp = tcg_temp_new_i32();
 468     tcg_gen_movi_i32(tmp, 0);
 469     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 470     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 471     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 472     tcg_gen_xor_i32(tmp, t0, t1);
 473     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 474     tcg_temp_free_i32(tmp);
 475     tcg_gen_mov_i32(dest, cpu_NF);
 476 }
 477
 478 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 479 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 480 {
 481     TCGv_i32 tmp = tcg_temp_new_i32();
 482     if (TCG_TARGET_HAS_add2_i32) {
 483         tcg_gen_movi_i32(tmp, 0);
 484         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 485         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 486     } else {
 487         TCGv_i64 q0 = tcg_temp_new_i64();
 488         TCGv_i64 q1 = tcg_temp_new_i64();
 489         tcg_gen_extu_i32_i64(q0, t0);
 490         tcg_gen_extu_i32_i64(q1, t1);
 491         tcg_gen_add_i64(q0, q0, q1);
 492         tcg_gen_extu_i32_i64(q1, cpu_CF);
 493         tcg_gen_add_i64(q0, q0, q1);
 494         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 495         tcg_temp_free_i64(q0);
 496         tcg_temp_free_i64(q1);
 497     }
 498     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 499     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 500     tcg_gen_xor_i32(tmp, t0, t1);
 501     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 502     tcg_temp_free_i32(tmp);
 503     tcg_gen_mov_i32(dest, cpu_NF);
 504 }
 505
 506 /* dest = T0 - T1. Compute C, N, V and Z flags */
 507 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 508 {
 509     TCGv_i32 tmp;
 510     tcg_gen_sub_i32(cpu_NF, t0, t1);
 511     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 512     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 513     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 514     tmp = tcg_temp_new_i32();
 515     tcg_gen_xor_i32(tmp, t0, t1);
 516     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 517     tcg_temp_free_i32(tmp);
 518     tcg_gen_mov_i32(dest, cpu_NF);
 519 }
 520
 521 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 522 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 523 {
 524     TCGv_i32 tmp = tcg_temp_new_i32();
 525     tcg_gen_not_i32(tmp, t1);
 526     gen_adc_CC(dest, t0, tmp);
 527     tcg_temp_free_i32(tmp);
 528 }
 529
 530 #define GEN_SHIFT(name)                                               \
 531 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 532 {                                                                     \
 533     TCGv_i32 tmp1, tmp2, tmp3;                                        \
 534     tmp1 = tcg_temp_new_i32();                                        \
 535     tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
 536     tmp2 = tcg_const_i32(0);                                          \
 537     tmp3 = tcg_const_i32(0x1f);                                       \
 538     tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
 539     tcg_temp_free_i32(tmp3);                                          \
 540     tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
 541     tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
 542     tcg_temp_free_i32(tmp2);                                          \
 543     tcg_temp_free_i32(tmp1);                                          \
 544 }
 545 GEN_SHIFT(shl)
 546 GEN_SHIFT(shr)
 547 #undef GEN_SHIFT
 548
 549 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 550 {
 551     TCGv_i32 tmp1, tmp2;
 552     tmp1 = tcg_temp_new_i32();
 553     tcg_gen_andi_i32(tmp1, t1, 0xff);
 554     tmp2 = tcg_const_i32(0x1f);
 555     tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
 556     tcg_temp_free_i32(tmp2);
 557     tcg_gen_sar_i32(dest, t0, tmp1);
 558     tcg_temp_free_i32(tmp1);
 559 }
 560
 561 static void shifter_out_im(TCGv_i32 var, int shift)
 562 {
 563     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 564 }
 565
 566 /* Shift by immediate.  Includes special handling for shift == 0.  */
 567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 568                                     int shift, int flags)
 569 {
 570     switch (shiftop) {
 571     case 0: /* LSL */
 572         if (shift != 0) {
 573             if (flags)
 574                 shifter_out_im(var, 32 - shift);
 575             tcg_gen_shli_i32(var, var, shift);
 576         }
 577         break;
 578     case 1: /* LSR */
 579         if (shift == 0) {
 580             if (flags) {
 581                 tcg_gen_shri_i32(cpu_CF, var, 31);
 582             }
 583             tcg_gen_movi_i32(var, 0);
 584         } else {
 585             if (flags)
 586                 shifter_out_im(var, shift - 1);
 587             tcg_gen_shri_i32(var, var, shift);
 588         }
 589         break;
 590     case 2: /* ASR */
 591         if (shift == 0)
 592             shift = 32;
 593         if (flags)
 594             shifter_out_im(var, shift - 1);
 595         if (shift == 32)
 596           shift = 31;
 597         tcg_gen_sari_i32(var, var, shift);
 598         break;
 599     case 3: /* ROR/RRX */
 600         if (shift != 0) {
 601             if (flags)
 602                 shifter_out_im(var, shift - 1);
 603             tcg_gen_rotri_i32(var, var, shift); break;
 604         } else {
 605             TCGv_i32 tmp = tcg_temp_new_i32();
 606             tcg_gen_shli_i32(tmp, cpu_CF, 31);
 607             if (flags)
 608                 shifter_out_im(var, 0);
 609             tcg_gen_shri_i32(var, var, 1);
 610             tcg_gen_or_i32(var, var, tmp);
 611             tcg_temp_free_i32(tmp);
 612         }
 613     }
 614 };
 615
 616 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 617                                      TCGv_i32 shift, int flags)
 618 {
 619     if (flags) {
 620         switch (shiftop) {
 621         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 622         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 623         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 624         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 625         }
 626     } else {
 627         switch (shiftop) {
 628         case 0:
 629             gen_shl(var, var, shift);
 630             break;
 631         case 1:
 632             gen_shr(var, var, shift);
 633             break;
 634         case 2:
 635             gen_sar(var, var, shift);
 636             break;
 637         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 638                 tcg_gen_rotr_i32(var, var, shift); break;
 639         }
 640     }
 641     tcg_temp_free_i32(shift);
 642 }
 643
 644 /*
 645  * Generate a conditional based on ARM condition code cc.
 646  * This is common between ARM and Aarch64 targets.
 647  */
 648 void arm_test_cc(DisasCompare *cmp, int cc)
 649 {
 650     TCGv_i32 value;
 651     TCGCond cond;
 652     bool global = true;
 653
 654     switch (cc) {
 655     case 0: /* eq: Z */
 656     case 1: /* ne: !Z */
 657         cond = TCG_COND_EQ;
 658         value = cpu_ZF;
 659         break;
 660
 661     case 2: /* cs: C */
 662     case 3: /* cc: !C */
 663         cond = TCG_COND_NE;
 664         value = cpu_CF;
 665         break;
 666
 667     case 4: /* mi: N */
 668     case 5: /* pl: !N */
 669         cond = TCG_COND_LT;
 670         value = cpu_NF;
 671         break;
 672
 673     case 6: /* vs: V */
 674     case 7: /* vc: !V */
 675         cond = TCG_COND_LT;
 676         value = cpu_VF;
 677         break;
 678
 679     case 8: /* hi: C && !Z */
 680     case 9: /* ls: !C || Z -> !(C && !Z) */
 681         cond = TCG_COND_NE;
 682         value = tcg_temp_new_i32();
 683         global = false;
 684         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 685            ZF is non-zero for !Z; so AND the two subexpressions.  */
 686         tcg_gen_neg_i32(value, cpu_CF);
 687         tcg_gen_and_i32(value, value, cpu_ZF);
 688         break;
 689
 690     case 10: /* ge: N == V -> N ^ V == 0 */
 691     case 11: /* lt: N != V -> N ^ V != 0 */
 692         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 693         cond = TCG_COND_GE;
 694         value = tcg_temp_new_i32();
 695         global = false;
 696         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 697         break;
 698
 699     case 12: /* gt: !Z && N == V */
 700     case 13: /* le: Z || N != V */
 701         cond = TCG_COND_NE;
 702         value = tcg_temp_new_i32();
 703         global = false;
 704         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 705          * the sign bit then AND with ZF to yield the result.  */
 706         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 707         tcg_gen_sari_i32(value, value, 31);
 708         tcg_gen_andc_i32(value, cpu_ZF, value);
 709         break;
 710
 711     case 14: /* always */
 712     case 15: /* always */
 713         /* Use the ALWAYS condition, which will fold early.
 714          * It doesn't matter what we use for the value.  */
 715         cond = TCG_COND_ALWAYS;
 716         value = cpu_ZF;
 717         goto no_invert;
 718
 719     default:
 720         fprintf(stderr, "Bad condition code 0x%x\n", cc);
 721         abort();
 722     }
 723
 724     if (cc & 1) {
 725         cond = tcg_invert_cond(cond);
 726     }
 727
 728  no_invert:
 729     cmp->cond = cond;
 730     cmp->value = value;
 731     cmp->value_global = global;
 732 }
 733
 734 void arm_free_cc(DisasCompare *cmp)
 735 {
 736     if (!cmp->value_global) {
 737         tcg_temp_free_i32(cmp->value);
 738     }
 739 }
 740
 741 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 742 {
 743     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 744 }
 745
 746 void arm_gen_test_cc(int cc, TCGLabel *label)
 747 {
 748     DisasCompare cmp;
 749     arm_test_cc(&cmp, cc);
 750     arm_jump_cc(&cmp, label);
 751     arm_free_cc(&cmp);
 752 }
 753
 754 static inline void gen_set_condexec(DisasContext *s)
 755 {
 756     if (s->condexec_mask) {
 757         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 758         TCGv_i32 tmp = tcg_temp_new_i32();
 759         tcg_gen_movi_i32(tmp, val);
 760         store_cpu_field(tmp, condexec_bits);
 761     }
 762 }
 763
 764 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 765 {
 766     tcg_gen_movi_i32(cpu_R[15], val);
 767 }
 768
 769 /* Set PC and Thumb state from var.  var is marked as dead.  */
 770 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 771 {
 772     s->base.is_jmp = DISAS_JUMP;
 773     tcg_gen_andi_i32(cpu_R[15], var, ~1);
 774     tcg_gen_andi_i32(var, var, 1);
 775     store_cpu_field(var, thumb);
 776 }
 777
 778 /*
 779  * Set PC and Thumb state from var. var is marked as dead.
 780  * For M-profile CPUs, include logic to detect exception-return
 781  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 782  * and BX reg, and no others, and happens only for code in Handler mode.
 783  * The Security Extension also requires us to check for the FNC_RETURN
 784  * which signals a function return from non-secure state; this can happen
 785  * in both Handler and Thread mode.
 786  * To avoid having to do multiple comparisons in inline generated code,
 787  * we make the check we do here loose, so it will match for EXC_RETURN
 788  * in Thread mode. For system emulation do_v7m_exception_exit() checks
 789  * for these spurious cases and returns without doing anything (giving
 790  * the same behaviour as for a branch to a non-magic address).
 791  *
 792  * In linux-user mode it is unclear what the right behaviour for an
 793  * attempted FNC_RETURN should be, because in real hardware this will go
 794  * directly to Secure code (ie not the Linux kernel) which will then treat
 795  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 796  * attempt behave the way it would on a CPU without the security extension,
 797  * which is to say "like a normal branch". That means we can simply treat
 798  * all branches as normal with no magic address behaviour.
 799  */
 800 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 801 {
 802     /* Generate the same code here as for a simple bx, but flag via
 803      * s->base.is_jmp that we need to do the rest of the work later.
 804      */
 805     gen_bx(s, var);
 806 #ifndef CONFIG_USER_ONLY
 807     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 808         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 809         s->base.is_jmp = DISAS_BX_EXCRET;
 810     }
 811 #endif
 812 }
 813
 814 static inline void gen_bx_excret_final_code(DisasContext *s)
 815 {
 816     /* Generate the code to finish possible exception return and end the TB */
 817     TCGLabel *excret_label = gen_new_label();
 818     uint32_t min_magic;
 819
 820     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 821         /* Covers FNC_RETURN and EXC_RETURN magic */
 822         min_magic = FNC_RETURN_MIN_MAGIC;
 823     } else {
 824         /* EXC_RETURN magic only */
 825         min_magic = EXC_RETURN_MIN_MAGIC;
 826     }
 827
 828     /* Is the new PC value in the magic range indicating exception return? */
 829     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
 830     /* No: end the TB as we would for a DISAS_JMP */
 831     if (is_singlestepping(s)) {
 832         gen_singlestep_exception(s);
 833     } else {
 834         tcg_gen_exit_tb(NULL, 0);
 835     }
 836     gen_set_label(excret_label);
 837     /* Yes: this is an exception return.
 838      * At this point in runtime env->regs[15] and env->thumb will hold
 839      * the exception-return magic number, which do_v7m_exception_exit()
 840      * will read. Nothing else will be able to see those values because
 841      * the cpu-exec main loop guarantees that we will always go straight
 842      * from raising the exception to the exception-handling code.
 843      *
 844      * gen_ss_advance(s) does nothing on M profile currently but
 845      * calling it is conceptually the right thing as we have executed
 846      * this instruction (compare SWI, HVC, SMC handling).
 847      */
 848     gen_ss_advance(s);
 849     gen_exception_internal(EXCP_EXCEPTION_EXIT);
 850 }
 851
 852 static inline void gen_bxns(DisasContext *s, int rm)
 853 {
 854     TCGv_i32 var = load_reg(s, rm);
 855
 856     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 857      * we need to sync state before calling it, but:
 858      *  - we don't need to do gen_set_pc_im() because the bxns helper will
 859      *    always set the PC itself
 860      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 861      *    unless it's outside an IT block or the last insn in an IT block,
 862      *    so we know that condexec == 0 (already set at the top of the TB)
 863      *    is correct in the non-UNPREDICTABLE cases, and we can choose
 864      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 865      */
 866     gen_helper_v7m_bxns(cpu_env, var);
 867     tcg_temp_free_i32(var);
 868     s->base.is_jmp = DISAS_EXIT;
 869 }
 870
 871 static inline void gen_blxns(DisasContext *s, int rm)
 872 {
 873     TCGv_i32 var = load_reg(s, rm);
 874
 875     /* We don't need to sync condexec state, for the same reason as bxns.
 876      * We do however need to set the PC, because the blxns helper reads it.
 877      * The blxns helper may throw an exception.
 878      */
 879     gen_set_pc_im(s, s->base.pc_next);
 880     gen_helper_v7m_blxns(cpu_env, var);
 881     tcg_temp_free_i32(var);
 882     s->base.is_jmp = DISAS_EXIT;
 883 }
 884
 885 /* Variant of store_reg which uses branch&exchange logic when storing
 886    to r15 in ARM architecture v7 and above. The source must be a temporary
 887    and will be marked as dead. */
 888 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 889 {
 890     if (reg == 15 && ENABLE_ARCH_7) {
 891         gen_bx(s, var);
 892     } else {
 893         store_reg(s, reg, var);
 894     }
 895 }
 896
 897 /* Variant of store_reg which uses branch&exchange logic when storing
 898  * to r15 in ARM architecture v5T and above. This is used for storing
 899  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 900  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 901 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 902 {
 903     if (reg == 15 && ENABLE_ARCH_5) {
 904         gen_bx_excret(s, var);
 905     } else {
 906         store_reg(s, reg, var);
 907     }
 908 }
 909
 910 #ifdef CONFIG_USER_ONLY
 911 #define IS_USER_ONLY 1
 912 #else
 913 #define IS_USER_ONLY 0
 914 #endif
 915
 916 /* Abstractions of "generate code to do a guest load/store for
 917  * AArch32", where a vaddr is always 32 bits (and is zero
 918  * extended if we're a 64 bit core) and  data is also
 919  * 32 bits unless specifically doing a 64 bit access.
 920  * These functions work like tcg_gen_qemu_{ld,st}* except
 921  * that the address argument is TCGv_i32 rather than TCGv.
 922  */
 923
 924 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 925 {
 926     TCGv addr = tcg_temp_new();
 927     tcg_gen_extu_i32_tl(addr, a32);
 928
 929     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 930     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 931         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 932     }
 933     return addr;
 934 }
 935
 936 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 937                             int index, MemOp opc)
 938 {
 939     TCGv addr;
 940
 941     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 942         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 943         opc |= MO_ALIGN;
 944     }
 945
 946     addr = gen_aa32_addr(s, a32, opc);
 947     tcg_gen_qemu_ld_i32(val, addr, index, opc);
 948     tcg_temp_free(addr);
 949 }
 950
 951 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 952                             int index, MemOp opc)
 953 {
 954     TCGv addr;
 955
 956     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 957         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 958         opc |= MO_ALIGN;
 959     }
 960
 961     addr = gen_aa32_addr(s, a32, opc);
 962     tcg_gen_qemu_st_i32(val, addr, index, opc);
 963     tcg_temp_free(addr);
 964 }
 965
 966 #define DO_GEN_LD(SUFF, OPC)                                             \
 967 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
 968                                      TCGv_i32 a32, int index)            \
 969 {                                                                        \
 970     gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
 971 }
 972
 973 #define DO_GEN_ST(SUFF, OPC)                                             \
 974 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
 975                                      TCGv_i32 a32, int index)            \
 976 {                                                                        \
 977     gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
 978 }
 979
 980 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
 981 {
 982     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 983     if (!IS_USER_ONLY && s->sctlr_b) {
 984         tcg_gen_rotri_i64(val, val, 32);
 985     }
 986 }
 987
 988 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 989                             int index, MemOp opc)
 990 {
 991     TCGv addr = gen_aa32_addr(s, a32, opc);
 992     tcg_gen_qemu_ld_i64(val, addr, index, opc);
 993     gen_aa32_frob64(s, val);
 994     tcg_temp_free(addr);
 995 }
 996
 997 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
 998                                  TCGv_i32 a32, int index)
 999 {
1000     gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1001 }
1002
1003 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1004                             int index, MemOp opc)
1005 {
1006     TCGv addr = gen_aa32_addr(s, a32, opc);
1007
1008     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1009     if (!IS_USER_ONLY && s->sctlr_b) {
1010         TCGv_i64 tmp = tcg_temp_new_i64();
1011         tcg_gen_rotri_i64(tmp, val, 32);
1012         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1013         tcg_temp_free_i64(tmp);
1014     } else {
1015         tcg_gen_qemu_st_i64(val, addr, index, opc);
1016     }
1017     tcg_temp_free(addr);
1018 }
1019
1020 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1021                                  TCGv_i32 a32, int index)
1022 {
1023     gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1024 }
1025
1026 DO_GEN_LD(8u, MO_UB)
1027 DO_GEN_LD(16u, MO_UW)
1028 DO_GEN_LD(32u, MO_UL)
1029 DO_GEN_ST(8, MO_UB)
1030 DO_GEN_ST(16, MO_UW)
1031 DO_GEN_ST(32, MO_UL)
1032
1033 static inline void gen_hvc(DisasContext *s, int imm16)
1034 {
1035     /* The pre HVC helper handles cases when HVC gets trapped
1036      * as an undefined insn by runtime configuration (ie before
1037      * the insn really executes).
1038      */
1039     gen_set_pc_im(s, s->pc_curr);
1040     gen_helper_pre_hvc(cpu_env);
1041     /* Otherwise we will treat this as a real exception which
1042      * happens after execution of the insn. (The distinction matters
1043      * for the PC value reported to the exception handler and also
1044      * for single stepping.)
1045      */
1046     s->svc_imm = imm16;
1047     gen_set_pc_im(s, s->base.pc_next);
1048     s->base.is_jmp = DISAS_HVC;
1049 }
1050
1051 static inline void gen_smc(DisasContext *s)
1052 {
1053     /* As with HVC, we may take an exception either before or after
1054      * the insn executes.
1055      */
1056     TCGv_i32 tmp;
1057
1058     gen_set_pc_im(s, s->pc_curr);
1059     tmp = tcg_const_i32(syn_aa32_smc());
1060     gen_helper_pre_smc(cpu_env, tmp);
1061     tcg_temp_free_i32(tmp);
1062     gen_set_pc_im(s, s->base.pc_next);
1063     s->base.is_jmp = DISAS_SMC;
1064 }
1065
1066 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1067 {
1068     gen_set_condexec(s);
1069     gen_set_pc_im(s, pc);
1070     gen_exception_internal(excp);
1071     s->base.is_jmp = DISAS_NORETURN;
1072 }
1073
1074 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1075                                int syn, uint32_t target_el)
1076 {
1077     gen_set_condexec(s);
1078     gen_set_pc_im(s, pc);
1079     gen_exception(excp, syn, target_el);
1080     s->base.is_jmp = DISAS_NORETURN;
1081 }
1082
1083 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1084 {
1085     TCGv_i32 tcg_syn;
1086
1087     gen_set_condexec(s);
1088     gen_set_pc_im(s, s->pc_curr);
1089     tcg_syn = tcg_const_i32(syn);
1090     gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1091     tcg_temp_free_i32(tcg_syn);
1092     s->base.is_jmp = DISAS_NORETURN;
1093 }
1094
1095 static void unallocated_encoding(DisasContext *s)
1096 {
1097     /* Unallocated and reserved encodings are uncategorized */
1098     gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1099                        default_exception_el(s));
1100 }
1101
1102 /* Force a TB lookup after an instruction that changes the CPU state.  */
1103 static inline void gen_lookup_tb(DisasContext *s)
1104 {
1105     tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1106     s->base.is_jmp = DISAS_EXIT;
1107 }
1108
1109 static inline void gen_hlt(DisasContext *s, int imm)
1110 {
1111     /* HLT. This has two purposes.
1112      * Architecturally, it is an external halting debug instruction.
1113      * Since QEMU doesn't implement external debug, we treat this as
1114      * it is required for halting debug disabled: it will UNDEF.
1115      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1116      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1117      * must trigger semihosting even for ARMv7 and earlier, where
1118      * HLT was an undefined encoding.
1119      * In system mode, we don't allow userspace access to
1120      * semihosting, to provide some semblance of security
1121      * (and for consistency with our 32-bit semihosting).
1122      */
1123     if (semihosting_enabled() &&
1124 #ifndef CONFIG_USER_ONLY
1125         s->current_el != 0 &&
1126 #endif
1127         (imm == (s->thumb ? 0x3c : 0xf000))) {
1128         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1129         return;
1130     }
1131
1132     unallocated_encoding(s);
1133 }
1134
1135 static TCGv_ptr get_fpstatus_ptr(int neon)
1136 {
1137     TCGv_ptr statusptr = tcg_temp_new_ptr();
1138     int offset;
1139     if (neon) {
1140         offset = offsetof(CPUARMState, vfp.standard_fp_status);
1141     } else {
1142         offset = offsetof(CPUARMState, vfp.fp_status);
1143     }
1144     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1145     return statusptr;
1146 }
1147
1148 static inline long vfp_reg_offset(bool dp, unsigned reg)
1149 {
1150     if (dp) {
1151         return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1152     } else {
1153         long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1154         if (reg & 1) {
1155             ofs += offsetof(CPU_DoubleU, l.upper);
1156         } else {
1157             ofs += offsetof(CPU_DoubleU, l.lower);
1158         }
1159         return ofs;
1160     }
1161 }
1162
1163 /* Return the offset of a 32-bit piece of a NEON register.
1164    zero is the least significant end of the register.  */
1165 static inline long
1166 neon_reg_offset (int reg, int n)
1167 {
1168     int sreg;
1169     sreg = reg * 2 + n;
1170     return vfp_reg_offset(0, sreg);
1171 }
1172
1173 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1174  * where 0 is the least significant end of the register.
1175  */
1176 static inline long
1177 neon_element_offset(int reg, int element, MemOp size)
1178 {
1179     int element_size = 1 << size;
1180     int ofs = element * element_size;
1181 #ifdef HOST_WORDS_BIGENDIAN
1182     /* Calculate the offset assuming fully little-endian,
1183      * then XOR to account for the order of the 8-byte units.
1184      */
1185     if (element_size < 8) {
1186         ofs ^= 8 - element_size;
1187     }
1188 #endif
1189     return neon_reg_offset(reg, 0) + ofs;
1190 }
1191
1192 static TCGv_i32 neon_load_reg(int reg, int pass)
1193 {
1194     TCGv_i32 tmp = tcg_temp_new_i32();
1195     tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1196     return tmp;
1197 }
1198
1199 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1200 {
1201     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1202
1203     switch (mop) {
1204     case MO_UB:
1205         tcg_gen_ld8u_i32(var, cpu_env, offset);
1206         break;
1207     case MO_UW:
1208         tcg_gen_ld16u_i32(var, cpu_env, offset);
1209         break;
1210     case MO_UL:
1211         tcg_gen_ld_i32(var, cpu_env, offset);
1212         break;
1213     default:
1214         g_assert_not_reached();
1215     }
1216 }
1217
1218 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1219 {
1220     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1221
1222     switch (mop) {
1223     case MO_UB:
1224         tcg_gen_ld8u_i64(var, cpu_env, offset);
1225         break;
1226     case MO_UW:
1227         tcg_gen_ld16u_i64(var, cpu_env, offset);
1228         break;
1229     case MO_UL:
1230         tcg_gen_ld32u_i64(var, cpu_env, offset);
1231         break;
1232     case MO_Q:
1233         tcg_gen_ld_i64(var, cpu_env, offset);
1234         break;
1235     default:
1236         g_assert_not_reached();
1237     }
1238 }
1239
1240 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1241 {
1242     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1243     tcg_temp_free_i32(var);
1244 }
1245
1246 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1247 {
1248     long offset = neon_element_offset(reg, ele, size);
1249
1250     switch (size) {
1251     case MO_8:
1252         tcg_gen_st8_i32(var, cpu_env, offset);
1253         break;
1254     case MO_16:
1255         tcg_gen_st16_i32(var, cpu_env, offset);
1256         break;
1257     case MO_32:
1258         tcg_gen_st_i32(var, cpu_env, offset);
1259         break;
1260     default:
1261         g_assert_not_reached();
1262     }
1263 }
1264
1265 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1266 {
1267     long offset = neon_element_offset(reg, ele, size);
1268
1269     switch (size) {
1270     case MO_8:
1271         tcg_gen_st8_i64(var, cpu_env, offset);
1272         break;
1273     case MO_16:
1274         tcg_gen_st16_i64(var, cpu_env, offset);
1275         break;
1276     case MO_32:
1277         tcg_gen_st32_i64(var, cpu_env, offset);
1278         break;
1279     case MO_64:
1280         tcg_gen_st_i64(var, cpu_env, offset);
1281         break;
1282     default:
1283         g_assert_not_reached();
1284     }
1285 }
1286
1287 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1288 {
1289     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1290 }
1291
1292 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1293 {
1294     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1295 }
1296
1297 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1298 {
1299     tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1300 }
1301
1302 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1303 {
1304     tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1305 }
1306
1307 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1308 {
1309     TCGv_ptr ret = tcg_temp_new_ptr();
1310     tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1311     return ret;
1312 }
1313
1314 #define ARM_CP_RW_BIT   (1 << 20)
1315
1316 /* Include the VFP and Neon decoders */
1317 #include "translate-vfp.inc.c"
1318 #include "translate-neon.inc.c"
1319
1320 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1321 {
1322     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1323 }
1324
1325 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1326 {
1327     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1328 }
1329
1330 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1331 {
1332     TCGv_i32 var = tcg_temp_new_i32();
1333     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1334     return var;
1335 }
1336
1337 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1338 {
1339     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1340     tcg_temp_free_i32(var);
1341 }
1342
1343 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1344 {
1345     iwmmxt_store_reg(cpu_M0, rn);
1346 }
1347
1348 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1349 {
1350     iwmmxt_load_reg(cpu_M0, rn);
1351 }
1352
1353 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1354 {
1355     iwmmxt_load_reg(cpu_V1, rn);
1356     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1357 }
1358
1359 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1360 {
1361     iwmmxt_load_reg(cpu_V1, rn);
1362     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1363 }
1364
1365 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1366 {
1367     iwmmxt_load_reg(cpu_V1, rn);
1368     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1369 }
1370
1371 #define IWMMXT_OP(name) \
1372 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1373 { \
1374     iwmmxt_load_reg(cpu_V1, rn); \
1375     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1376 }
1377
1378 #define IWMMXT_OP_ENV(name) \
1379 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1380 { \
1381     iwmmxt_load_reg(cpu_V1, rn); \
1382     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1383 }
1384
1385 #define IWMMXT_OP_ENV_SIZE(name) \
1386 IWMMXT_OP_ENV(name##b) \
1387 IWMMXT_OP_ENV(name##w) \
1388 IWMMXT_OP_ENV(name##l)
1389
1390 #define IWMMXT_OP_ENV1(name) \
1391 static inline void gen_op_iwmmxt_##name##_M0(void) \
1392 { \
1393     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1394 }
1395
1396 IWMMXT_OP(maddsq)
1397 IWMMXT_OP(madduq)
1398 IWMMXT_OP(sadb)
1399 IWMMXT_OP(sadw)
1400 IWMMXT_OP(mulslw)
1401 IWMMXT_OP(mulshw)
1402 IWMMXT_OP(mululw)
1403 IWMMXT_OP(muluhw)
1404 IWMMXT_OP(macsw)
1405 IWMMXT_OP(macuw)
1406
1407 IWMMXT_OP_ENV_SIZE(unpackl)
1408 IWMMXT_OP_ENV_SIZE(unpackh)
1409
1410 IWMMXT_OP_ENV1(unpacklub)
1411 IWMMXT_OP_ENV1(unpackluw)
1412 IWMMXT_OP_ENV1(unpacklul)
1413 IWMMXT_OP_ENV1(unpackhub)
1414 IWMMXT_OP_ENV1(unpackhuw)
1415 IWMMXT_OP_ENV1(unpackhul)
1416 IWMMXT_OP_ENV1(unpacklsb)
1417 IWMMXT_OP_ENV1(unpacklsw)
1418 IWMMXT_OP_ENV1(unpacklsl)
1419 IWMMXT_OP_ENV1(unpackhsb)
1420 IWMMXT_OP_ENV1(unpackhsw)
1421 IWMMXT_OP_ENV1(unpackhsl)
1422
1423 IWMMXT_OP_ENV_SIZE(cmpeq)
1424 IWMMXT_OP_ENV_SIZE(cmpgtu)
1425 IWMMXT_OP_ENV_SIZE(cmpgts)
1426
1427 IWMMXT_OP_ENV_SIZE(mins)
1428 IWMMXT_OP_ENV_SIZE(minu)
1429 IWMMXT_OP_ENV_SIZE(maxs)
1430 IWMMXT_OP_ENV_SIZE(maxu)
1431
1432 IWMMXT_OP_ENV_SIZE(subn)
1433 IWMMXT_OP_ENV_SIZE(addn)
1434 IWMMXT_OP_ENV_SIZE(subu)
1435 IWMMXT_OP_ENV_SIZE(addu)
1436 IWMMXT_OP_ENV_SIZE(subs)
1437 IWMMXT_OP_ENV_SIZE(adds)
1438
1439 IWMMXT_OP_ENV(avgb0)
1440 IWMMXT_OP_ENV(avgb1)
1441 IWMMXT_OP_ENV(avgw0)
1442 IWMMXT_OP_ENV(avgw1)
1443
1444 IWMMXT_OP_ENV(packuw)
1445 IWMMXT_OP_ENV(packul)
1446 IWMMXT_OP_ENV(packuq)
1447 IWMMXT_OP_ENV(packsw)
1448 IWMMXT_OP_ENV(packsl)
1449 IWMMXT_OP_ENV(packsq)
1450
1451 static void gen_op_iwmmxt_set_mup(void)
1452 {
1453     TCGv_i32 tmp;
1454     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1455     tcg_gen_ori_i32(tmp, tmp, 2);
1456     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1457 }
1458
1459 static void gen_op_iwmmxt_set_cup(void)
1460 {
1461     TCGv_i32 tmp;
1462     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1463     tcg_gen_ori_i32(tmp, tmp, 1);
1464     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1465 }
1466
1467 static void gen_op_iwmmxt_setpsr_nz(void)
1468 {
1469     TCGv_i32 tmp = tcg_temp_new_i32();
1470     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1471     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1472 }
1473
1474 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1475 {
1476     iwmmxt_load_reg(cpu_V1, rn);
1477     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1478     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1479 }
1480
1481 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1482                                      TCGv_i32 dest)
1483 {
1484     int rd;
1485     uint32_t offset;
1486     TCGv_i32 tmp;
1487
1488     rd = (insn >> 16) & 0xf;
1489     tmp = load_reg(s, rd);
1490
1491     offset = (insn & 0xff) << ((insn >> 7) & 2);
1492     if (insn & (1 << 24)) {
1493         /* Pre indexed */
1494         if (insn & (1 << 23))
1495             tcg_gen_addi_i32(tmp, tmp, offset);
1496         else
1497             tcg_gen_addi_i32(tmp, tmp, -offset);
1498         tcg_gen_mov_i32(dest, tmp);
1499         if (insn & (1 << 21))
1500             store_reg(s, rd, tmp);
1501         else
1502             tcg_temp_free_i32(tmp);
1503     } else if (insn & (1 << 21)) {
1504         /* Post indexed */
1505         tcg_gen_mov_i32(dest, tmp);
1506         if (insn & (1 << 23))
1507             tcg_gen_addi_i32(tmp, tmp, offset);
1508         else
1509             tcg_gen_addi_i32(tmp, tmp, -offset);
1510         store_reg(s, rd, tmp);
1511     } else if (!(insn & (1 << 23)))
1512         return 1;
1513     return 0;
1514 }
1515
1516 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1517 {
1518     int rd = (insn >> 0) & 0xf;
1519     TCGv_i32 tmp;
1520
1521     if (insn & (1 << 8)) {
1522         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1523             return 1;
1524         } else {
1525             tmp = iwmmxt_load_creg(rd);
1526         }
1527     } else {
1528         tmp = tcg_temp_new_i32();
1529         iwmmxt_load_reg(cpu_V0, rd);
1530         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1531     }
1532     tcg_gen_andi_i32(tmp, tmp, mask);
1533     tcg_gen_mov_i32(dest, tmp);
1534     tcg_temp_free_i32(tmp);
1535     return 0;
1536 }
1537
1538 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1539    (ie. an undefined instruction).  */
1540 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1541 {
1542     int rd, wrd;
1543     int rdhi, rdlo, rd0, rd1, i;
1544     TCGv_i32 addr;
1545     TCGv_i32 tmp, tmp2, tmp3;
1546
1547     if ((insn & 0x0e000e00) == 0x0c000000) {
1548         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1549             wrd = insn & 0xf;
1550             rdlo = (insn >> 12) & 0xf;
1551             rdhi = (insn >> 16) & 0xf;
1552             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1553                 iwmmxt_load_reg(cpu_V0, wrd);
1554                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1555                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1556             } else {                                    /* TMCRR */
1557                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1558                 iwmmxt_store_reg(cpu_V0, wrd);
1559                 gen_op_iwmmxt_set_mup();
1560             }
1561             return 0;
1562         }
1563
1564         wrd = (insn >> 12) & 0xf;
1565         addr = tcg_temp_new_i32();
1566         if (gen_iwmmxt_address(s, insn, addr)) {
1567             tcg_temp_free_i32(addr);
1568             return 1;
1569         }
1570         if (insn & ARM_CP_RW_BIT) {
1571             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1572                 tmp = tcg_temp_new_i32();
1573                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1574                 iwmmxt_store_creg(wrd, tmp);
1575             } else {
1576                 i = 1;
1577                 if (insn & (1 << 8)) {
1578                     if (insn & (1 << 22)) {             /* WLDRD */
1579                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1580                         i = 0;
1581                     } else {                            /* WLDRW wRd */
1582                         tmp = tcg_temp_new_i32();
1583                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1584                     }
1585                 } else {
1586                     tmp = tcg_temp_new_i32();
1587                     if (insn & (1 << 22)) {             /* WLDRH */
1588                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1589                     } else {                            /* WLDRB */
1590                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1591                     }
1592                 }
1593                 if (i) {
1594                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1595                     tcg_temp_free_i32(tmp);
1596                 }
1597                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1598             }
1599         } else {
1600             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1601                 tmp = iwmmxt_load_creg(wrd);
1602                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1603             } else {
1604                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1605                 tmp = tcg_temp_new_i32();
1606                 if (insn & (1 << 8)) {
1607                     if (insn & (1 << 22)) {             /* WSTRD */
1608                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1609                     } else {                            /* WSTRW wRd */
1610                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1611                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1612                     }
1613                 } else {
1614                     if (insn & (1 << 22)) {             /* WSTRH */
1615                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1616                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1617                     } else {                            /* WSTRB */
1618                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1619                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1620                     }
1621                 }
1622             }
1623             tcg_temp_free_i32(tmp);
1624         }
1625         tcg_temp_free_i32(addr);
1626         return 0;
1627     }
1628
1629     if ((insn & 0x0f000000) != 0x0e000000)
1630         return 1;
1631
1632     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1633     case 0x000:                                                 /* WOR */
1634         wrd = (insn >> 12) & 0xf;
1635         rd0 = (insn >> 0) & 0xf;
1636         rd1 = (insn >> 16) & 0xf;
1637         gen_op_iwmmxt_movq_M0_wRn(rd0);
1638         gen_op_iwmmxt_orq_M0_wRn(rd1);
1639         gen_op_iwmmxt_setpsr_nz();
1640         gen_op_iwmmxt_movq_wRn_M0(wrd);
1641         gen_op_iwmmxt_set_mup();
1642         gen_op_iwmmxt_set_cup();
1643         break;
1644     case 0x011:                                                 /* TMCR */
1645         if (insn & 0xf)
1646             return 1;
1647         rd = (insn >> 12) & 0xf;
1648         wrd = (insn >> 16) & 0xf;
1649         switch (wrd) {
1650         case ARM_IWMMXT_wCID:
1651         case ARM_IWMMXT_wCASF:
1652             break;
1653         case ARM_IWMMXT_wCon:
1654             gen_op_iwmmxt_set_cup();
1655             /* Fall through.  */
1656         case ARM_IWMMXT_wCSSF:
1657             tmp = iwmmxt_load_creg(wrd);
1658             tmp2 = load_reg(s, rd);
1659             tcg_gen_andc_i32(tmp, tmp, tmp2);
1660             tcg_temp_free_i32(tmp2);
1661             iwmmxt_store_creg(wrd, tmp);
1662             break;
1663         case ARM_IWMMXT_wCGR0:
1664         case ARM_IWMMXT_wCGR1:
1665         case ARM_IWMMXT_wCGR2:
1666         case ARM_IWMMXT_wCGR3:
1667             gen_op_iwmmxt_set_cup();
1668             tmp = load_reg(s, rd);
1669             iwmmxt_store_creg(wrd, tmp);
1670             break;
1671         default:
1672             return 1;
1673         }
1674         break;
1675     case 0x100:                                                 /* WXOR */
1676         wrd = (insn >> 12) & 0xf;
1677         rd0 = (insn >> 0) & 0xf;
1678         rd1 = (insn >> 16) & 0xf;
1679         gen_op_iwmmxt_movq_M0_wRn(rd0);
1680         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1681         gen_op_iwmmxt_setpsr_nz();
1682         gen_op_iwmmxt_movq_wRn_M0(wrd);
1683         gen_op_iwmmxt_set_mup();
1684         gen_op_iwmmxt_set_cup();
1685         break;
1686     case 0x111:                                                 /* TMRC */
1687         if (insn & 0xf)
1688             return 1;
1689         rd = (insn >> 12) & 0xf;
1690         wrd = (insn >> 16) & 0xf;
1691         tmp = iwmmxt_load_creg(wrd);
1692         store_reg(s, rd, tmp);
1693         break;
1694     case 0x300:                                                 /* WANDN */
1695         wrd = (insn >> 12) & 0xf;
1696         rd0 = (insn >> 0) & 0xf;
1697         rd1 = (insn >> 16) & 0xf;
1698         gen_op_iwmmxt_movq_M0_wRn(rd0);
1699         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1700         gen_op_iwmmxt_andq_M0_wRn(rd1);
1701         gen_op_iwmmxt_setpsr_nz();
1702         gen_op_iwmmxt_movq_wRn_M0(wrd);
1703         gen_op_iwmmxt_set_mup();
1704         gen_op_iwmmxt_set_cup();
1705         break;
1706     case 0x200:                                                 /* WAND */
1707         wrd = (insn >> 12) & 0xf;
1708         rd0 = (insn >> 0) & 0xf;
1709         rd1 = (insn >> 16) & 0xf;
1710         gen_op_iwmmxt_movq_M0_wRn(rd0);
1711         gen_op_iwmmxt_andq_M0_wRn(rd1);
1712         gen_op_iwmmxt_setpsr_nz();
1713         gen_op_iwmmxt_movq_wRn_M0(wrd);
1714         gen_op_iwmmxt_set_mup();
1715         gen_op_iwmmxt_set_cup();
1716         break;
1717     case 0x810: case 0xa10:                             /* WMADD */
1718         wrd = (insn >> 12) & 0xf;
1719         rd0 = (insn >> 0) & 0xf;
1720         rd1 = (insn >> 16) & 0xf;
1721         gen_op_iwmmxt_movq_M0_wRn(rd0);
1722         if (insn & (1 << 21))
1723             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1724         else
1725             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1726         gen_op_iwmmxt_movq_wRn_M0(wrd);
1727         gen_op_iwmmxt_set_mup();
1728         break;
1729     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1730         wrd = (insn >> 12) & 0xf;
1731         rd0 = (insn >> 16) & 0xf;
1732         rd1 = (insn >> 0) & 0xf;
1733         gen_op_iwmmxt_movq_M0_wRn(rd0);
1734         switch ((insn >> 22) & 3) {
1735         case 0:
1736             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1737             break;
1738         case 1:
1739             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1740             break;
1741         case 2:
1742             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1743             break;
1744         case 3:
1745             return 1;
1746         }
1747         gen_op_iwmmxt_movq_wRn_M0(wrd);
1748         gen_op_iwmmxt_set_mup();
1749         gen_op_iwmmxt_set_cup();
1750         break;
1751     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1752         wrd = (insn >> 12) & 0xf;
1753         rd0 = (insn >> 16) & 0xf;
1754         rd1 = (insn >> 0) & 0xf;
1755         gen_op_iwmmxt_movq_M0_wRn(rd0);
1756         switch ((insn >> 22) & 3) {
1757         case 0:
1758             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1759             break;
1760         case 1:
1761             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1762             break;
1763         case 2:
1764             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1765             break;
1766         case 3:
1767             return 1;
1768         }
1769         gen_op_iwmmxt_movq_wRn_M0(wrd);
1770         gen_op_iwmmxt_set_mup();
1771         gen_op_iwmmxt_set_cup();
1772         break;
1773     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1774         wrd = (insn >> 12) & 0xf;
1775         rd0 = (insn >> 16) & 0xf;
1776         rd1 = (insn >> 0) & 0xf;
1777         gen_op_iwmmxt_movq_M0_wRn(rd0);
1778         if (insn & (1 << 22))
1779             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1780         else
1781             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1782         if (!(insn & (1 << 20)))
1783             gen_op_iwmmxt_addl_M0_wRn(wrd);
1784         gen_op_iwmmxt_movq_wRn_M0(wrd);
1785         gen_op_iwmmxt_set_mup();
1786         break;
1787     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1788         wrd = (insn >> 12) & 0xf;
1789         rd0 = (insn >> 16) & 0xf;
1790         rd1 = (insn >> 0) & 0xf;
1791         gen_op_iwmmxt_movq_M0_wRn(rd0);
1792         if (insn & (1 << 21)) {
1793             if (insn & (1 << 20))
1794                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1795             else
1796                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1797         } else {
1798             if (insn & (1 << 20))
1799                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1800             else
1801                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1802         }
1803         gen_op_iwmmxt_movq_wRn_M0(wrd);
1804         gen_op_iwmmxt_set_mup();
1805         break;
1806     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1807         wrd = (insn >> 12) & 0xf;
1808         rd0 = (insn >> 16) & 0xf;
1809         rd1 = (insn >> 0) & 0xf;
1810         gen_op_iwmmxt_movq_M0_wRn(rd0);
1811         if (insn & (1 << 21))
1812             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1813         else
1814             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1815         if (!(insn & (1 << 20))) {
1816             iwmmxt_load_reg(cpu_V1, wrd);
1817             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1818         }
1819         gen_op_iwmmxt_movq_wRn_M0(wrd);
1820         gen_op_iwmmxt_set_mup();
1821         break;
1822     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1823         wrd = (insn >> 12) & 0xf;
1824         rd0 = (insn >> 16) & 0xf;
1825         rd1 = (insn >> 0) & 0xf;
1826         gen_op_iwmmxt_movq_M0_wRn(rd0);
1827         switch ((insn >> 22) & 3) {
1828         case 0:
1829             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1830             break;
1831         case 1:
1832             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1833             break;
1834         case 2:
1835             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1836             break;
1837         case 3:
1838             return 1;
1839         }
1840         gen_op_iwmmxt_movq_wRn_M0(wrd);
1841         gen_op_iwmmxt_set_mup();
1842         gen_op_iwmmxt_set_cup();
1843         break;
1844     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1845         wrd = (insn >> 12) & 0xf;
1846         rd0 = (insn >> 16) & 0xf;
1847         rd1 = (insn >> 0) & 0xf;
1848         gen_op_iwmmxt_movq_M0_wRn(rd0);
1849         if (insn & (1 << 22)) {
1850             if (insn & (1 << 20))
1851                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1852             else
1853                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1854         } else {
1855             if (insn & (1 << 20))
1856                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1857             else
1858                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1859         }
1860         gen_op_iwmmxt_movq_wRn_M0(wrd);
1861         gen_op_iwmmxt_set_mup();
1862         gen_op_iwmmxt_set_cup();
1863         break;
1864     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1865         wrd = (insn >> 12) & 0xf;
1866         rd0 = (insn >> 16) & 0xf;
1867         rd1 = (insn >> 0) & 0xf;
1868         gen_op_iwmmxt_movq_M0_wRn(rd0);
1869         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1870         tcg_gen_andi_i32(tmp, tmp, 7);
1871         iwmmxt_load_reg(cpu_V1, rd1);
1872         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1873         tcg_temp_free_i32(tmp);
1874         gen_op_iwmmxt_movq_wRn_M0(wrd);
1875         gen_op_iwmmxt_set_mup();
1876         break;
1877     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1878         if (((insn >> 6) & 3) == 3)
1879             return 1;
1880         rd = (insn >> 12) & 0xf;
1881         wrd = (insn >> 16) & 0xf;
1882         tmp = load_reg(s, rd);
1883         gen_op_iwmmxt_movq_M0_wRn(wrd);
1884         switch ((insn >> 6) & 3) {
1885         case 0:
1886             tmp2 = tcg_const_i32(0xff);
1887             tmp3 = tcg_const_i32((insn & 7) << 3);
1888             break;
1889         case 1:
1890             tmp2 = tcg_const_i32(0xffff);
1891             tmp3 = tcg_const_i32((insn & 3) << 4);
1892             break;
1893         case 2:
1894             tmp2 = tcg_const_i32(0xffffffff);
1895             tmp3 = tcg_const_i32((insn & 1) << 5);
1896             break;
1897         default:
1898             tmp2 = NULL;
1899             tmp3 = NULL;
1900         }
1901         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1902         tcg_temp_free_i32(tmp3);
1903         tcg_temp_free_i32(tmp2);
1904         tcg_temp_free_i32(tmp);
1905         gen_op_iwmmxt_movq_wRn_M0(wrd);
1906         gen_op_iwmmxt_set_mup();
1907         break;
1908     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1909         rd = (insn >> 12) & 0xf;
1910         wrd = (insn >> 16) & 0xf;
1911         if (rd == 15 || ((insn >> 22) & 3) == 3)
1912             return 1;
1913         gen_op_iwmmxt_movq_M0_wRn(wrd);
1914         tmp = tcg_temp_new_i32();
1915         switch ((insn >> 22) & 3) {
1916         case 0:
1917             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1918             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1919             if (insn & 8) {
1920                 tcg_gen_ext8s_i32(tmp, tmp);
1921             } else {
1922                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1923             }
1924             break;
1925         case 1:
1926             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1927             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1928             if (insn & 8) {
1929                 tcg_gen_ext16s_i32(tmp, tmp);
1930             } else {
1931                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1932             }
1933             break;
1934         case 2:
1935             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1936             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1937             break;
1938         }
1939         store_reg(s, rd, tmp);
1940         break;
1941     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1942         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1943             return 1;
1944         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1945         switch ((insn >> 22) & 3) {
1946         case 0:
1947             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1948             break;
1949         case 1:
1950             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1951             break;
1952         case 2:
1953             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1954             break;
1955         }
1956         tcg_gen_shli_i32(tmp, tmp, 28);
1957         gen_set_nzcv(tmp);
1958         tcg_temp_free_i32(tmp);
1959         break;
1960     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1961         if (((insn >> 6) & 3) == 3)
1962             return 1;
1963         rd = (insn >> 12) & 0xf;
1964         wrd = (insn >> 16) & 0xf;
1965         tmp = load_reg(s, rd);
1966         switch ((insn >> 6) & 3) {
1967         case 0:
1968             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1969             break;
1970         case 1:
1971             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1972             break;
1973         case 2:
1974             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1975             break;
1976         }
1977         tcg_temp_free_i32(tmp);
1978         gen_op_iwmmxt_movq_wRn_M0(wrd);
1979         gen_op_iwmmxt_set_mup();
1980         break;
1981     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1982         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1983             return 1;
1984         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1985         tmp2 = tcg_temp_new_i32();
1986         tcg_gen_mov_i32(tmp2, tmp);
1987         switch ((insn >> 22) & 3) {
1988         case 0:
1989             for (i = 0; i < 7; i ++) {
1990                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1991                 tcg_gen_and_i32(tmp, tmp, tmp2);
1992             }
1993             break;
1994         case 1:
1995             for (i = 0; i < 3; i ++) {
1996                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1997                 tcg_gen_and_i32(tmp, tmp, tmp2);
1998             }
1999             break;
2000         case 2:
2001             tcg_gen_shli_i32(tmp2, tmp2, 16);
2002             tcg_gen_and_i32(tmp, tmp, tmp2);
2003             break;
2004         }
2005         gen_set_nzcv(tmp);
2006         tcg_temp_free_i32(tmp2);
2007         tcg_temp_free_i32(tmp);
2008         break;
2009     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
2010         wrd = (insn >> 12) & 0xf;
2011         rd0 = (insn >> 16) & 0xf;
2012         gen_op_iwmmxt_movq_M0_wRn(rd0);
2013         switch ((insn >> 22) & 3) {
2014         case 0:
2015             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2016             break;
2017         case 1:
2018             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2019             break;
2020         case 2:
2021             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2022             break;
2023         case 3:
2024             return 1;
2025         }
2026         gen_op_iwmmxt_movq_wRn_M0(wrd);
2027         gen_op_iwmmxt_set_mup();
2028         break;
2029     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2030         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2031             return 1;
2032         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2033         tmp2 = tcg_temp_new_i32();
2034         tcg_gen_mov_i32(tmp2, tmp);
2035         switch ((insn >> 22) & 3) {
2036         case 0:
2037             for (i = 0; i < 7; i ++) {
2038                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2039                 tcg_gen_or_i32(tmp, tmp, tmp2);
2040             }
2041             break;
2042         case 1:
2043             for (i = 0; i < 3; i ++) {
2044                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2045                 tcg_gen_or_i32(tmp, tmp, tmp2);
2046             }
2047             break;
2048         case 2:
2049             tcg_gen_shli_i32(tmp2, tmp2, 16);
2050             tcg_gen_or_i32(tmp, tmp, tmp2);
2051             break;
2052         }
2053         gen_set_nzcv(tmp);
2054         tcg_temp_free_i32(tmp2);
2055         tcg_temp_free_i32(tmp);
2056         break;
2057     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2058         rd = (insn >> 12) & 0xf;
2059         rd0 = (insn >> 16) & 0xf;
2060         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2061             return 1;
2062         gen_op_iwmmxt_movq_M0_wRn(rd0);
2063         tmp = tcg_temp_new_i32();
2064         switch ((insn >> 22) & 3) {
2065         case 0:
2066             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2067             break;
2068         case 1:
2069             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2070             break;
2071         case 2:
2072             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2073             break;
2074         }
2075         store_reg(s, rd, tmp);
2076         break;
2077     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2078     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2079         wrd = (insn >> 12) & 0xf;
2080         rd0 = (insn >> 16) & 0xf;
2081         rd1 = (insn >> 0) & 0xf;
2082         gen_op_iwmmxt_movq_M0_wRn(rd0);
2083         switch ((insn >> 22) & 3) {
2084         case 0:
2085             if (insn & (1 << 21))
2086                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2087             else
2088                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2089             break;
2090         case 1:
2091             if (insn & (1 << 21))
2092                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2093             else
2094                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2095             break;
2096         case 2:
2097             if (insn & (1 << 21))
2098                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2099             else
2100                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2101             break;
2102         case 3:
2103             return 1;
2104         }
2105         gen_op_iwmmxt_movq_wRn_M0(wrd);
2106         gen_op_iwmmxt_set_mup();
2107         gen_op_iwmmxt_set_cup();
2108         break;
2109     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2110     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2111         wrd = (insn >> 12) & 0xf;
2112         rd0 = (insn >> 16) & 0xf;
2113         gen_op_iwmmxt_movq_M0_wRn(rd0);
2114         switch ((insn >> 22) & 3) {
2115         case 0:
2116             if (insn & (1 << 21))
2117                 gen_op_iwmmxt_unpacklsb_M0();
2118             else
2119                 gen_op_iwmmxt_unpacklub_M0();
2120             break;
2121         case 1:
2122             if (insn & (1 << 21))
2123                 gen_op_iwmmxt_unpacklsw_M0();
2124             else
2125                 gen_op_iwmmxt_unpackluw_M0();
2126             break;
2127         case 2:
2128             if (insn & (1 << 21))
2129                 gen_op_iwmmxt_unpacklsl_M0();
2130             else
2131                 gen_op_iwmmxt_unpacklul_M0();
2132             break;
2133         case 3:
2134             return 1;
2135         }
2136         gen_op_iwmmxt_movq_wRn_M0(wrd);
2137         gen_op_iwmmxt_set_mup();
2138         gen_op_iwmmxt_set_cup();
2139         break;
2140     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2141     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2142         wrd = (insn >> 12) & 0xf;
2143         rd0 = (insn >> 16) & 0xf;
2144         gen_op_iwmmxt_movq_M0_wRn(rd0);
2145         switch ((insn >> 22) & 3) {
2146         case 0:
2147             if (insn & (1 << 21))
2148                 gen_op_iwmmxt_unpackhsb_M0();
2149             else
2150                 gen_op_iwmmxt_unpackhub_M0();
2151             break;
2152         case 1:
2153             if (insn & (1 << 21))
2154                 gen_op_iwmmxt_unpackhsw_M0();
2155             else
2156                 gen_op_iwmmxt_unpackhuw_M0();
2157             break;
2158         case 2:
2159             if (insn & (1 << 21))
2160                 gen_op_iwmmxt_unpackhsl_M0();
2161             else
2162                 gen_op_iwmmxt_unpackhul_M0();
2163             break;
2164         case 3:
2165             return 1;
2166         }
2167         gen_op_iwmmxt_movq_wRn_M0(wrd);
2168         gen_op_iwmmxt_set_mup();
2169         gen_op_iwmmxt_set_cup();
2170         break;
2171     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2172     case 0x214: case 0x614: case 0xa14: case 0xe14:
2173         if (((insn >> 22) & 3) == 0)
2174             return 1;
2175         wrd = (insn >> 12) & 0xf;
2176         rd0 = (insn >> 16) & 0xf;
2177         gen_op_iwmmxt_movq_M0_wRn(rd0);
2178         tmp = tcg_temp_new_i32();
2179         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2180             tcg_temp_free_i32(tmp);
2181             return 1;
2182         }
2183         switch ((insn >> 22) & 3) {
2184         case 1:
2185             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2186             break;
2187         case 2:
2188             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2189             break;
2190         case 3:
2191             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2192             break;
2193         }
2194         tcg_temp_free_i32(tmp);
2195         gen_op_iwmmxt_movq_wRn_M0(wrd);
2196         gen_op_iwmmxt_set_mup();
2197         gen_op_iwmmxt_set_cup();
2198         break;
2199     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2200     case 0x014: case 0x414: case 0x814: case 0xc14:
2201         if (((insn >> 22) & 3) == 0)
2202             return 1;
2203         wrd = (insn >> 12) & 0xf;
2204         rd0 = (insn >> 16) & 0xf;
2205         gen_op_iwmmxt_movq_M0_wRn(rd0);
2206         tmp = tcg_temp_new_i32();
2207         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2208             tcg_temp_free_i32(tmp);
2209             return 1;
2210         }
2211         switch ((insn >> 22) & 3) {
2212         case 1:
2213             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2214             break;
2215         case 2:
2216             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2217             break;
2218         case 3:
2219             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2220             break;
2221         }
2222         tcg_temp_free_i32(tmp);
2223         gen_op_iwmmxt_movq_wRn_M0(wrd);
2224         gen_op_iwmmxt_set_mup();
2225         gen_op_iwmmxt_set_cup();
2226         break;
2227     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2228     case 0x114: case 0x514: case 0x914: case 0xd14:
2229         if (((insn >> 22) & 3) == 0)
2230             return 1;
2231         wrd = (insn >> 12) & 0xf;
2232         rd0 = (insn >> 16) & 0xf;
2233         gen_op_iwmmxt_movq_M0_wRn(rd0);
2234         tmp = tcg_temp_new_i32();
2235         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2236             tcg_temp_free_i32(tmp);
2237             return 1;
2238         }
2239         switch ((insn >> 22) & 3) {
2240         case 1:
2241             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2242             break;
2243         case 2:
2244             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2245             break;
2246         case 3:
2247             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2248             break;
2249         }
2250         tcg_temp_free_i32(tmp);
2251         gen_op_iwmmxt_movq_wRn_M0(wrd);
2252         gen_op_iwmmxt_set_mup();
2253         gen_op_iwmmxt_set_cup();
2254         break;
2255     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2256     case 0x314: case 0x714: case 0xb14: case 0xf14:
2257         if (((insn >> 22) & 3) == 0)
2258             return 1;
2259         wrd = (insn >> 12) & 0xf;
2260         rd0 = (insn >> 16) & 0xf;
2261         gen_op_iwmmxt_movq_M0_wRn(rd0);
2262         tmp = tcg_temp_new_i32();
2263         switch ((insn >> 22) & 3) {
2264         case 1:
2265             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2266                 tcg_temp_free_i32(tmp);
2267                 return 1;
2268             }
2269             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2270             break;
2271         case 2:
2272             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2273                 tcg_temp_free_i32(tmp);
2274                 return 1;
2275             }
2276             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2277             break;
2278         case 3:
2279             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2280                 tcg_temp_free_i32(tmp);
2281                 return 1;
2282             }
2283             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2284             break;
2285         }
2286         tcg_temp_free_i32(tmp);
2287         gen_op_iwmmxt_movq_wRn_M0(wrd);
2288         gen_op_iwmmxt_set_mup();
2289         gen_op_iwmmxt_set_cup();
2290         break;
2291     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2292     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2293         wrd = (insn >> 12) & 0xf;
2294         rd0 = (insn >> 16) & 0xf;
2295         rd1 = (insn >> 0) & 0xf;
2296         gen_op_iwmmxt_movq_M0_wRn(rd0);
2297         switch ((insn >> 22) & 3) {
2298         case 0:
2299             if (insn & (1 << 21))
2300                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2301             else
2302                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2303             break;
2304         case 1:
2305             if (insn & (1 << 21))
2306                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2307             else
2308                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2309             break;
2310         case 2:
2311             if (insn & (1 << 21))
2312                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2313             else
2314                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2315             break;
2316         case 3:
2317             return 1;
2318         }
2319         gen_op_iwmmxt_movq_wRn_M0(wrd);
2320         gen_op_iwmmxt_set_mup();
2321         break;
2322     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2323     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2324         wrd = (insn >> 12) & 0xf;
2325         rd0 = (insn >> 16) & 0xf;
2326         rd1 = (insn >> 0) & 0xf;
2327         gen_op_iwmmxt_movq_M0_wRn(rd0);
2328         switch ((insn >> 22) & 3) {
2329         case 0:
2330             if (insn & (1 << 21))
2331                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2332             else
2333                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2334             break;
2335         case 1:
2336             if (insn & (1 << 21))
2337                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2338             else
2339                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2340             break;
2341         case 2:
2342             if (insn & (1 << 21))
2343                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2344             else
2345                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2346             break;
2347         case 3:
2348             return 1;
2349         }
2350         gen_op_iwmmxt_movq_wRn_M0(wrd);
2351         gen_op_iwmmxt_set_mup();
2352         break;
2353     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2354     case 0x402: case 0x502: case 0x602: case 0x702:
2355         wrd = (insn >> 12) & 0xf;
2356         rd0 = (insn >> 16) & 0xf;
2357         rd1 = (insn >> 0) & 0xf;
2358         gen_op_iwmmxt_movq_M0_wRn(rd0);
2359         tmp = tcg_const_i32((insn >> 20) & 3);
2360         iwmmxt_load_reg(cpu_V1, rd1);
2361         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2362         tcg_temp_free_i32(tmp);
2363         gen_op_iwmmxt_movq_wRn_M0(wrd);
2364         gen_op_iwmmxt_set_mup();
2365         break;
2366     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2367     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2368     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2369     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2370         wrd = (insn >> 12) & 0xf;
2371         rd0 = (insn >> 16) & 0xf;
2372         rd1 = (insn >> 0) & 0xf;
2373         gen_op_iwmmxt_movq_M0_wRn(rd0);
2374         switch ((insn >> 20) & 0xf) {
2375         case 0x0:
2376             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2377             break;
2378         case 0x1:
2379             gen_op_iwmmxt_subub_M0_wRn(rd1);
2380             break;
2381         case 0x3:
2382             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2383             break;
2384         case 0x4:
2385             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2386             break;
2387         case 0x5:
2388             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2389             break;
2390         case 0x7:
2391             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2392             break;
2393         case 0x8:
2394             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2395             break;
2396         case 0x9:
2397             gen_op_iwmmxt_subul_M0_wRn(rd1);
2398             break;
2399         case 0xb:
2400             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2401             break;
2402         default:
2403             return 1;
2404         }
2405         gen_op_iwmmxt_movq_wRn_M0(wrd);
2406         gen_op_iwmmxt_set_mup();
2407         gen_op_iwmmxt_set_cup();
2408         break;
2409     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2410     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2411     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2412     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2413         wrd = (insn >> 12) & 0xf;
2414         rd0 = (insn >> 16) & 0xf;
2415         gen_op_iwmmxt_movq_M0_wRn(rd0);
2416         tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2417         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2418         tcg_temp_free_i32(tmp);
2419         gen_op_iwmmxt_movq_wRn_M0(wrd);
2420         gen_op_iwmmxt_set_mup();
2421         gen_op_iwmmxt_set_cup();
2422         break;
2423     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2424     case 0x418: case 0x518: case 0x618: case 0x718:
2425     case 0x818: case 0x918: case 0xa18: case 0xb18:
2426     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2427         wrd = (insn >> 12) & 0xf;
2428         rd0 = (insn >> 16) & 0xf;
2429         rd1 = (insn >> 0) & 0xf;
2430         gen_op_iwmmxt_movq_M0_wRn(rd0);
2431         switch ((insn >> 20) & 0xf) {
2432         case 0x0:
2433             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2434             break;
2435         case 0x1:
2436             gen_op_iwmmxt_addub_M0_wRn(rd1);
2437             break;
2438         case 0x3:
2439             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2440             break;
2441         case 0x4:
2442             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2443             break;
2444         case 0x5:
2445             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2446             break;
2447         case 0x7:
2448             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2449             break;
2450         case 0x8:
2451             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2452             break;
2453         case 0x9:
2454             gen_op_iwmmxt_addul_M0_wRn(rd1);
2455             break;
2456         case 0xb:
2457             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2458             break;
2459         default:
2460             return 1;
2461         }
2462         gen_op_iwmmxt_movq_wRn_M0(wrd);
2463         gen_op_iwmmxt_set_mup();
2464         gen_op_iwmmxt_set_cup();
2465         break;
2466     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2467     case 0x408: case 0x508: case 0x608: case 0x708:
2468     case 0x808: case 0x908: case 0xa08: case 0xb08:
2469     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2470         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2471             return 1;
2472         wrd = (insn >> 12) & 0xf;
2473         rd0 = (insn >> 16) & 0xf;
2474         rd1 = (insn >> 0) & 0xf;
2475         gen_op_iwmmxt_movq_M0_wRn(rd0);
2476         switch ((insn >> 22) & 3) {
2477         case 1:
2478             if (insn & (1 << 21))
2479                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2480             else
2481                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2482             break;
2483         case 2:
2484             if (insn & (1 << 21))
2485                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2486             else
2487                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2488             break;
2489         case 3:
2490             if (insn & (1 << 21))
2491                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2492             else
2493                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2494             break;
2495         }
2496         gen_op_iwmmxt_movq_wRn_M0(wrd);
2497         gen_op_iwmmxt_set_mup();
2498         gen_op_iwmmxt_set_cup();
2499         break;
2500     case 0x201: case 0x203: case 0x205: case 0x207:
2501     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2502     case 0x211: case 0x213: case 0x215: case 0x217:
2503     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2504         wrd = (insn >> 5) & 0xf;
2505         rd0 = (insn >> 12) & 0xf;
2506         rd1 = (insn >> 0) & 0xf;
2507         if (rd0 == 0xf || rd1 == 0xf)
2508             return 1;
2509         gen_op_iwmmxt_movq_M0_wRn(wrd);
2510         tmp = load_reg(s, rd0);
2511         tmp2 = load_reg(s, rd1);
2512         switch ((insn >> 16) & 0xf) {
2513         case 0x0:                                       /* TMIA */
2514             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2515             break;
2516         case 0x8:                                       /* TMIAPH */
2517             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2518             break;
2519         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2520             if (insn & (1 << 16))
2521                 tcg_gen_shri_i32(tmp, tmp, 16);
2522             if (insn & (1 << 17))
2523                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2524             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2525             break;
2526         default:
2527             tcg_temp_free_i32(tmp2);
2528             tcg_temp_free_i32(tmp);
2529             return 1;
2530         }
2531         tcg_temp_free_i32(tmp2);
2532         tcg_temp_free_i32(tmp);
2533         gen_op_iwmmxt_movq_wRn_M0(wrd);
2534         gen_op_iwmmxt_set_mup();
2535         break;
2536     default:
2537         return 1;
2538     }
2539
2540     return 0;
2541 }
2542
2543 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2544    (ie. an undefined instruction).  */
2545 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2546 {
2547     int acc, rd0, rd1, rdhi, rdlo;
2548     TCGv_i32 tmp, tmp2;
2549
2550     if ((insn & 0x0ff00f10) == 0x0e200010) {
2551         /* Multiply with Internal Accumulate Format */
2552         rd0 = (insn >> 12) & 0xf;
2553         rd1 = insn & 0xf;
2554         acc = (insn >> 5) & 7;
2555
2556         if (acc != 0)
2557             return 1;
2558
2559         tmp = load_reg(s, rd0);
2560         tmp2 = load_reg(s, rd1);
2561         switch ((insn >> 16) & 0xf) {
2562         case 0x0:                                       /* MIA */
2563             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2564             break;
2565         case 0x8:                                       /* MIAPH */
2566             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2567             break;
2568         case 0xc:                                       /* MIABB */
2569         case 0xd:                                       /* MIABT */
2570         case 0xe:                                       /* MIATB */
2571         case 0xf:                                       /* MIATT */
2572             if (insn & (1 << 16))
2573                 tcg_gen_shri_i32(tmp, tmp, 16);
2574             if (insn & (1 << 17))
2575                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2576             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2577             break;
2578         default:
2579             return 1;
2580         }
2581         tcg_temp_free_i32(tmp2);
2582         tcg_temp_free_i32(tmp);
2583
2584         gen_op_iwmmxt_movq_wRn_M0(acc);
2585         return 0;
2586     }
2587
2588     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2589         /* Internal Accumulator Access Format */
2590         rdhi = (insn >> 16) & 0xf;
2591         rdlo = (insn >> 12) & 0xf;
2592         acc = insn & 7;
2593
2594         if (acc != 0)
2595             return 1;
2596
2597         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2598             iwmmxt_load_reg(cpu_V0, acc);
2599             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2600             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2601             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2602         } else {                                        /* MAR */
2603             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2604             iwmmxt_store_reg(cpu_V0, acc);
2605         }
2606         return 0;
2607     }
2608
2609     return 1;
2610 }
2611
2612 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2613 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2614     if (dc_isar_feature(aa32_simd_r32, s)) { \
2615         reg = (((insn) >> (bigbit)) & 0x0f) \
2616               | (((insn) >> ((smallbit) - 4)) & 0x10); \
2617     } else { \
2618         if (insn & (1 << (smallbit))) \
2619             return 1; \
2620         reg = ((insn) >> (bigbit)) & 0x0f; \
2621     }} while (0)
2622
2623 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2624 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2625 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2626
2627 static void gen_neon_dup_low16(TCGv_i32 var)
2628 {
2629     TCGv_i32 tmp = tcg_temp_new_i32();
2630     tcg_gen_ext16u_i32(var, var);
2631     tcg_gen_shli_i32(tmp, var, 16);
2632     tcg_gen_or_i32(var, var, tmp);
2633     tcg_temp_free_i32(tmp);
2634 }
2635
2636 static void gen_neon_dup_high16(TCGv_i32 var)
2637 {
2638     TCGv_i32 tmp = tcg_temp_new_i32();
2639     tcg_gen_andi_i32(var, var, 0xffff0000);
2640     tcg_gen_shri_i32(tmp, var, 16);
2641     tcg_gen_or_i32(var, var, tmp);
2642     tcg_temp_free_i32(tmp);
2643 }
2644
2645 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2646 {
2647 #ifndef CONFIG_USER_ONLY
2648     return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2649            ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2650 #else
2651     return true;
2652 #endif
2653 }
2654
2655 static void gen_goto_ptr(void)
2656 {
2657     tcg_gen_lookup_and_goto_ptr();
2658 }
2659
2660 /* This will end the TB but doesn't guarantee we'll return to
2661  * cpu_loop_exec. Any live exit_requests will be processed as we
2662  * enter the next TB.
2663  */
2664 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2665 {
2666     if (use_goto_tb(s, dest)) {
2667         tcg_gen_goto_tb(n);
2668         gen_set_pc_im(s, dest);
2669         tcg_gen_exit_tb(s->base.tb, n);
2670     } else {
2671         gen_set_pc_im(s, dest);
2672         gen_goto_ptr();
2673     }
2674     s->base.is_jmp = DISAS_NORETURN;
2675 }
2676
2677 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2678 {
2679     if (unlikely(is_singlestepping(s))) {
2680         /* An indirect jump so that we still trigger the debug exception.  */
2681         gen_set_pc_im(s, dest);
2682         s->base.is_jmp = DISAS_JUMP;
2683     } else {
2684         gen_goto_tb(s, 0, dest);
2685     }
2686 }
2687
2688 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2689 {
2690     if (x)
2691         tcg_gen_sari_i32(t0, t0, 16);
2692     else
2693         gen_sxth(t0);
2694     if (y)
2695         tcg_gen_sari_i32(t1, t1, 16);
2696     else
2697         gen_sxth(t1);
2698     tcg_gen_mul_i32(t0, t0, t1);
2699 }
2700
2701 /* Return the mask of PSR bits set by a MSR instruction.  */
2702 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2703 {
2704     uint32_t mask = 0;
2705
2706     if (flags & (1 << 0)) {
2707         mask |= 0xff;
2708     }
2709     if (flags & (1 << 1)) {
2710         mask |= 0xff00;
2711     }
2712     if (flags & (1 << 2)) {
2713         mask |= 0xff0000;
2714     }
2715     if (flags & (1 << 3)) {
2716         mask |= 0xff000000;
2717     }
2718
2719     /* Mask out undefined and reserved bits.  */
2720     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2721
2722     /* Mask out execution state.  */
2723     if (!spsr) {
2724         mask &= ~CPSR_EXEC;
2725     }
2726
2727     /* Mask out privileged bits.  */
2728     if (IS_USER(s)) {
2729         mask &= CPSR_USER;
2730     }
2731     return mask;
2732 }
2733
2734 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2735 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2736 {
2737     TCGv_i32 tmp;
2738     if (spsr) {
2739         /* ??? This is also undefined in system mode.  */
2740         if (IS_USER(s))
2741             return 1;
2742
2743         tmp = load_cpu_field(spsr);
2744         tcg_gen_andi_i32(tmp, tmp, ~mask);
2745         tcg_gen_andi_i32(t0, t0, mask);
2746         tcg_gen_or_i32(tmp, tmp, t0);
2747         store_cpu_field(tmp, spsr);
2748     } else {
2749         gen_set_cpsr(t0, mask);
2750     }
2751     tcg_temp_free_i32(t0);
2752     gen_lookup_tb(s);
2753     return 0;
2754 }
2755
2756 /* Returns nonzero if access to the PSR is not permitted.  */
2757 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2758 {
2759     TCGv_i32 tmp;
2760     tmp = tcg_temp_new_i32();
2761     tcg_gen_movi_i32(tmp, val);
2762     return gen_set_psr(s, mask, spsr, tmp);
2763 }
2764
2765 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2766                                      int *tgtmode, int *regno)
2767 {
2768     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2769      * the target mode and register number, and identify the various
2770      * unpredictable cases.
2771      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2772      *  + executed in user mode
2773      *  + using R15 as the src/dest register
2774      *  + accessing an unimplemented register
2775      *  + accessing a register that's inaccessible at current PL/security state*
2776      *  + accessing a register that you could access with a different insn
2777      * We choose to UNDEF in all these cases.
2778      * Since we don't know which of the various AArch32 modes we are in
2779      * we have to defer some checks to runtime.
2780      * Accesses to Monitor mode registers from Secure EL1 (which implies
2781      * that EL3 is AArch64) must trap to EL3.
2782      *
2783      * If the access checks fail this function will emit code to take
2784      * an exception and return false. Otherwise it will return true,
2785      * and set *tgtmode and *regno appropriately.
2786      */
2787     int exc_target = default_exception_el(s);
2788
2789     /* These instructions are present only in ARMv8, or in ARMv7 with the
2790      * Virtualization Extensions.
2791      */
2792     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2793         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2794         goto undef;
2795     }
2796
2797     if (IS_USER(s) || rn == 15) {
2798         goto undef;
2799     }
2800
2801     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2802      * of registers into (r, sysm).
2803      */
2804     if (r) {
2805         /* SPSRs for other modes */
2806         switch (sysm) {
2807         case 0xe: /* SPSR_fiq */
2808             *tgtmode = ARM_CPU_MODE_FIQ;
2809             break;
2810         case 0x10: /* SPSR_irq */
2811             *tgtmode = ARM_CPU_MODE_IRQ;
2812             break;
2813         case 0x12: /* SPSR_svc */
2814             *tgtmode = ARM_CPU_MODE_SVC;
2815             break;
2816         case 0x14: /* SPSR_abt */
2817             *tgtmode = ARM_CPU_MODE_ABT;
2818             break;
2819         case 0x16: /* SPSR_und */
2820             *tgtmode = ARM_CPU_MODE_UND;
2821             break;
2822         case 0x1c: /* SPSR_mon */
2823             *tgtmode = ARM_CPU_MODE_MON;
2824             break;
2825         case 0x1e: /* SPSR_hyp */
2826             *tgtmode = ARM_CPU_MODE_HYP;
2827             break;
2828         default: /* unallocated */
2829             goto undef;
2830         }
2831         /* We arbitrarily assign SPSR a register number of 16. */
2832         *regno = 16;
2833     } else {
2834         /* general purpose registers for other modes */
2835         switch (sysm) {
2836         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2837             *tgtmode = ARM_CPU_MODE_USR;
2838             *regno = sysm + 8;
2839             break;
2840         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2841             *tgtmode = ARM_CPU_MODE_FIQ;
2842             *regno = sysm;
2843             break;
2844         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2845             *tgtmode = ARM_CPU_MODE_IRQ;
2846             *regno = sysm & 1 ? 13 : 14;
2847             break;
2848         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2849             *tgtmode = ARM_CPU_MODE_SVC;
2850             *regno = sysm & 1 ? 13 : 14;
2851             break;
2852         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2853             *tgtmode = ARM_CPU_MODE_ABT;
2854             *regno = sysm & 1 ? 13 : 14;
2855             break;
2856         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2857             *tgtmode = ARM_CPU_MODE_UND;
2858             *regno = sysm & 1 ? 13 : 14;
2859             break;
2860         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2861             *tgtmode = ARM_CPU_MODE_MON;
2862             *regno = sysm & 1 ? 13 : 14;
2863             break;
2864         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2865             *tgtmode = ARM_CPU_MODE_HYP;
2866             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2867             *regno = sysm & 1 ? 13 : 17;
2868             break;
2869         default: /* unallocated */
2870             goto undef;
2871         }
2872     }
2873
2874     /* Catch the 'accessing inaccessible register' cases we can detect
2875      * at translate time.
2876      */
2877     switch (*tgtmode) {
2878     case ARM_CPU_MODE_MON:
2879         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2880             goto undef;
2881         }
2882         if (s->current_el == 1) {
2883             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2884              * then accesses to Mon registers trap to EL3
2885              */
2886             exc_target = 3;
2887             goto undef;
2888         }
2889         break;
2890     case ARM_CPU_MODE_HYP:
2891         /*
2892          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2893          * (and so we can forbid accesses from EL2 or below). elr_hyp
2894          * can be accessed also from Hyp mode, so forbid accesses from
2895          * EL0 or EL1.
2896          */
2897         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2898             (s->current_el < 3 && *regno != 17)) {
2899             goto undef;
2900         }
2901         break;
2902     default:
2903         break;
2904     }
2905
2906     return true;
2907
2908 undef:
2909     /* If we get here then some access check did not pass */
2910     gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2911                        syn_uncategorized(), exc_target);
2912     return false;
2913 }
2914
2915 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2916 {
2917     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2918     int tgtmode = 0, regno = 0;
2919
2920     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2921         return;
2922     }
2923
2924     /* Sync state because msr_banked() can raise exceptions */
2925     gen_set_condexec(s);
2926     gen_set_pc_im(s, s->pc_curr);
2927     tcg_reg = load_reg(s, rn);
2928     tcg_tgtmode = tcg_const_i32(tgtmode);
2929     tcg_regno = tcg_const_i32(regno);
2930     gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2931     tcg_temp_free_i32(tcg_tgtmode);
2932     tcg_temp_free_i32(tcg_regno);
2933     tcg_temp_free_i32(tcg_reg);
2934     s->base.is_jmp = DISAS_UPDATE;
2935 }
2936
2937 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2938 {
2939     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2940     int tgtmode = 0, regno = 0;
2941
2942     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2943         return;
2944     }
2945
2946     /* Sync state because mrs_banked() can raise exceptions */
2947     gen_set_condexec(s);
2948     gen_set_pc_im(s, s->pc_curr);
2949     tcg_reg = tcg_temp_new_i32();
2950     tcg_tgtmode = tcg_const_i32(tgtmode);
2951     tcg_regno = tcg_const_i32(regno);
2952     gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2953     tcg_temp_free_i32(tcg_tgtmode);
2954     tcg_temp_free_i32(tcg_regno);
2955     store_reg(s, rn, tcg_reg);
2956     s->base.is_jmp = DISAS_UPDATE;
2957 }
2958
2959 /* Store value to PC as for an exception return (ie don't
2960  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2961  * will do the masking based on the new value of the Thumb bit.
2962  */
2963 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2964 {
2965     tcg_gen_mov_i32(cpu_R[15], pc);
2966     tcg_temp_free_i32(pc);
2967 }
2968
2969 /* Generate a v6 exception return.  Marks both values as dead.  */
2970 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2971 {
2972     store_pc_exc_ret(s, pc);
2973     /* The cpsr_write_eret helper will mask the low bits of PC
2974      * appropriately depending on the new Thumb bit, so it must
2975      * be called after storing the new PC.
2976      */
2977     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2978         gen_io_start();
2979     }
2980     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2981     tcg_temp_free_i32(cpsr);
2982     /* Must exit loop to check un-masked IRQs */
2983     s->base.is_jmp = DISAS_EXIT;
2984 }
2985
2986 /* Generate an old-style exception return. Marks pc as dead. */
2987 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2988 {
2989     gen_rfe(s, pc, load_cpu_field(spsr));
2990 }
2991
2992 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
2993
2994 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
2995 {
2996     switch (size) {
2997     case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
2998     case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
2999     case 2: tcg_gen_add_i32(t0, t0, t1); break;
3000     default: abort();
3001     }
3002 }
3003
3004 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3005 {
3006     switch (size) {
3007     case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3008     case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3009     case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3010     default: return;
3011     }
3012 }
3013
3014 static TCGv_i32 neon_load_scratch(int scratch)
3015 {
3016     TCGv_i32 tmp = tcg_temp_new_i32();
3017     tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3018     return tmp;
3019 }
3020
3021 static void neon_store_scratch(int scratch, TCGv_i32 var)
3022 {
3023     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3024     tcg_temp_free_i32(var);
3025 }
3026
3027 static inline TCGv_i32 neon_get_scalar(int size, int reg)
3028 {
3029     TCGv_i32 tmp;
3030     if (size == 1) {
3031         tmp = neon_load_reg(reg & 7, reg >> 4);
3032         if (reg & 8) {
3033             gen_neon_dup_high16(tmp);
3034         } else {
3035             gen_neon_dup_low16(tmp);
3036         }
3037     } else {
3038         tmp = neon_load_reg(reg & 15, reg >> 4);
3039     }
3040     return tmp;
3041 }
3042
3043 static int gen_neon_unzip(int rd, int rm, int size, int q)
3044 {
3045     TCGv_ptr pd, pm;
3046
3047     if (!q && size == 2) {
3048         return 1;
3049     }
3050     pd = vfp_reg_ptr(true, rd);
3051     pm = vfp_reg_ptr(true, rm);
3052     if (q) {
3053         switch (size) {
3054         case 0:
3055             gen_helper_neon_qunzip8(pd, pm);
3056             break;
3057         case 1:
3058             gen_helper_neon_qunzip16(pd, pm);
3059             break;
3060         case 2:
3061             gen_helper_neon_qunzip32(pd, pm);
3062             break;
3063         default:
3064             abort();
3065         }
3066     } else {
3067         switch (size) {
3068         case 0:
3069             gen_helper_neon_unzip8(pd, pm);
3070             break;
3071         case 1:
3072             gen_helper_neon_unzip16(pd, pm);
3073             break;
3074         default:
3075             abort();
3076         }
3077     }
3078     tcg_temp_free_ptr(pd);
3079     tcg_temp_free_ptr(pm);
3080     return 0;
3081 }
3082
3083 static int gen_neon_zip(int rd, int rm, int size, int q)
3084 {
3085     TCGv_ptr pd, pm;
3086
3087     if (!q && size == 2) {
3088         return 1;
3089     }
3090     pd = vfp_reg_ptr(true, rd);
3091     pm = vfp_reg_ptr(true, rm);
3092     if (q) {
3093         switch (size) {
3094         case 0:
3095             gen_helper_neon_qzip8(pd, pm);
3096             break;
3097         case 1:
3098             gen_helper_neon_qzip16(pd, pm);
3099             break;
3100         case 2:
3101             gen_helper_neon_qzip32(pd, pm);
3102             break;
3103         default:
3104             abort();
3105         }
3106     } else {
3107         switch (size) {
3108         case 0:
3109             gen_helper_neon_zip8(pd, pm);
3110             break;
3111         case 1:
3112             gen_helper_neon_zip16(pd, pm);
3113             break;
3114         default:
3115             abort();
3116         }
3117     }
3118     tcg_temp_free_ptr(pd);
3119     tcg_temp_free_ptr(pm);
3120     return 0;
3121 }
3122
3123 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3124 {
3125     TCGv_i32 rd, tmp;
3126
3127     rd = tcg_temp_new_i32();
3128     tmp = tcg_temp_new_i32();
3129
3130     tcg_gen_shli_i32(rd, t0, 8);
3131     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3132     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3133     tcg_gen_or_i32(rd, rd, tmp);
3134
3135     tcg_gen_shri_i32(t1, t1, 8);
3136     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3137     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3138     tcg_gen_or_i32(t1, t1, tmp);
3139     tcg_gen_mov_i32(t0, rd);
3140
3141     tcg_temp_free_i32(tmp);
3142     tcg_temp_free_i32(rd);
3143 }
3144
3145 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3146 {
3147     TCGv_i32 rd, tmp;
3148
3149     rd = tcg_temp_new_i32();
3150     tmp = tcg_temp_new_i32();
3151
3152     tcg_gen_shli_i32(rd, t0, 16);
3153     tcg_gen_andi_i32(tmp, t1, 0xffff);
3154     tcg_gen_or_i32(rd, rd, tmp);
3155     tcg_gen_shri_i32(t1, t1, 16);
3156     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3157     tcg_gen_or_i32(t1, t1, tmp);
3158     tcg_gen_mov_i32(t0, rd);
3159
3160     tcg_temp_free_i32(tmp);
3161     tcg_temp_free_i32(rd);
3162 }
3163
3164 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3165 {
3166     switch (size) {
3167     case 0: gen_helper_neon_narrow_u8(dest, src); break;
3168     case 1: gen_helper_neon_narrow_u16(dest, src); break;
3169     case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3170     default: abort();
3171     }
3172 }
3173
3174 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3175 {
3176     switch (size) {
3177     case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3178     case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3179     case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3180     default: abort();
3181     }
3182 }
3183
3184 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3185 {
3186     switch (size) {
3187     case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3188     case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3189     case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3190     default: abort();
3191     }
3192 }
3193
3194 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3195 {
3196     switch (size) {
3197     case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3198     case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3199     case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3200     default: abort();
3201     }
3202 }
3203
3204 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3205 {
3206     if (u) {
3207         switch (size) {
3208         case 0: gen_helper_neon_widen_u8(dest, src); break;
3209         case 1: gen_helper_neon_widen_u16(dest, src); break;
3210         case 2: tcg_gen_extu_i32_i64(dest, src); break;
3211         default: abort();
3212         }
3213     } else {
3214         switch (size) {
3215         case 0: gen_helper_neon_widen_s8(dest, src); break;
3216         case 1: gen_helper_neon_widen_s16(dest, src); break;
3217         case 2: tcg_gen_ext_i32_i64(dest, src); break;
3218         default: abort();
3219         }
3220     }
3221     tcg_temp_free_i32(src);
3222 }
3223
3224 static inline void gen_neon_addl(int size)
3225 {
3226     switch (size) {
3227     case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3228     case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3229     case 2: tcg_gen_add_i64(CPU_V001); break;
3230     default: abort();
3231     }
3232 }
3233
3234 static inline void gen_neon_negl(TCGv_i64 var, int size)
3235 {
3236     switch (size) {
3237     case 0: gen_helper_neon_negl_u16(var, var); break;
3238     case 1: gen_helper_neon_negl_u32(var, var); break;
3239     case 2:
3240         tcg_gen_neg_i64(var, var);
3241         break;
3242     default: abort();
3243     }
3244 }
3245
3246 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3247 {
3248     switch (size) {
3249     case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3250     case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3251     default: abort();
3252     }
3253 }
3254
3255 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3256                                  int size, int u)
3257 {
3258     TCGv_i64 tmp;
3259
3260     switch ((size << 1) | u) {
3261     case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3262     case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3263     case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3264     case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3265     case 4:
3266         tmp = gen_muls_i64_i32(a, b);
3267         tcg_gen_mov_i64(dest, tmp);
3268         tcg_temp_free_i64(tmp);
3269         break;
3270     case 5:
3271         tmp = gen_mulu_i64_i32(a, b);
3272         tcg_gen_mov_i64(dest, tmp);
3273         tcg_temp_free_i64(tmp);
3274         break;
3275     default: abort();
3276     }
3277
3278     /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
3279        Don't forget to clean them now.  */
3280     if (size < 2) {
3281         tcg_temp_free_i32(a);
3282         tcg_temp_free_i32(b);
3283     }
3284 }
3285
3286 static void gen_neon_narrow_op(int op, int u, int size,
3287                                TCGv_i32 dest, TCGv_i64 src)
3288 {
3289     if (op) {
3290         if (u) {
3291             gen_neon_unarrow_sats(size, dest, src);
3292         } else {
3293             gen_neon_narrow(size, dest, src);
3294         }
3295     } else {
3296         if (u) {
3297             gen_neon_narrow_satu(size, dest, src);
3298         } else {
3299             gen_neon_narrow_sats(size, dest, src);
3300         }
3301     }
3302 }
3303
3304 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
3305  * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3306  * table A7-13.
3307  */
3308 #define NEON_2RM_VREV64 0
3309 #define NEON_2RM_VREV32 1
3310 #define NEON_2RM_VREV16 2
3311 #define NEON_2RM_VPADDL 4
3312 #define NEON_2RM_VPADDL_U 5
3313 #define NEON_2RM_AESE 6 /* Includes AESD */
3314 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
3315 #define NEON_2RM_VCLS 8
3316 #define NEON_2RM_VCLZ 9
3317 #define NEON_2RM_VCNT 10
3318 #define NEON_2RM_VMVN 11
3319 #define NEON_2RM_VPADAL 12
3320 #define NEON_2RM_VPADAL_U 13
3321 #define NEON_2RM_VQABS 14
3322 #define NEON_2RM_VQNEG 15
3323 #define NEON_2RM_VCGT0 16
3324 #define NEON_2RM_VCGE0 17
3325 #define NEON_2RM_VCEQ0 18
3326 #define NEON_2RM_VCLE0 19
3327 #define NEON_2RM_VCLT0 20
3328 #define NEON_2RM_SHA1H 21
3329 #define NEON_2RM_VABS 22
3330 #define NEON_2RM_VNEG 23
3331 #define NEON_2RM_VCGT0_F 24
3332 #define NEON_2RM_VCGE0_F 25
3333 #define NEON_2RM_VCEQ0_F 26
3334 #define NEON_2RM_VCLE0_F 27
3335 #define NEON_2RM_VCLT0_F 28
3336 #define NEON_2RM_VABS_F 30
3337 #define NEON_2RM_VNEG_F 31
3338 #define NEON_2RM_VSWP 32
3339 #define NEON_2RM_VTRN 33
3340 #define NEON_2RM_VUZP 34
3341 #define NEON_2RM_VZIP 35
3342 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3343 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3344 #define NEON_2RM_VSHLL 38
3345 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3346 #define NEON_2RM_VRINTN 40
3347 #define NEON_2RM_VRINTX 41
3348 #define NEON_2RM_VRINTA 42
3349 #define NEON_2RM_VRINTZ 43
3350 #define NEON_2RM_VCVT_F16_F32 44
3351 #define NEON_2RM_VRINTM 45
3352 #define NEON_2RM_VCVT_F32_F16 46
3353 #define NEON_2RM_VRINTP 47
3354 #define NEON_2RM_VCVTAU 48
3355 #define NEON_2RM_VCVTAS 49
3356 #define NEON_2RM_VCVTNU 50
3357 #define NEON_2RM_VCVTNS 51
3358 #define NEON_2RM_VCVTPU 52
3359 #define NEON_2RM_VCVTPS 53
3360 #define NEON_2RM_VCVTMU 54
3361 #define NEON_2RM_VCVTMS 55
3362 #define NEON_2RM_VRECPE 56
3363 #define NEON_2RM_VRSQRTE 57
3364 #define NEON_2RM_VRECPE_F 58
3365 #define NEON_2RM_VRSQRTE_F 59
3366 #define NEON_2RM_VCVT_FS 60
3367 #define NEON_2RM_VCVT_FU 61
3368 #define NEON_2RM_VCVT_SF 62
3369 #define NEON_2RM_VCVT_UF 63
3370
3371 static bool neon_2rm_is_v8_op(int op)
3372 {
3373     /* Return true if this neon 2reg-misc op is ARMv8 and up */
3374     switch (op) {
3375     case NEON_2RM_VRINTN:
3376     case NEON_2RM_VRINTA:
3377     case NEON_2RM_VRINTM:
3378     case NEON_2RM_VRINTP:
3379     case NEON_2RM_VRINTZ:
3380     case NEON_2RM_VRINTX:
3381     case NEON_2RM_VCVTAU:
3382     case NEON_2RM_VCVTAS:
3383     case NEON_2RM_VCVTNU:
3384     case NEON_2RM_VCVTNS:
3385     case NEON_2RM_VCVTPU:
3386     case NEON_2RM_VCVTPS:
3387     case NEON_2RM_VCVTMU:
3388     case NEON_2RM_VCVTMS:
3389         return true;
3390     default:
3391         return false;
3392     }
3393 }
3394
3395 /* Each entry in this array has bit n set if the insn allows
3396  * size value n (otherwise it will UNDEF). Since unallocated
3397  * op values will have no bits set they always UNDEF.
3398  */
3399 static const uint8_t neon_2rm_sizes[] = {
3400     [NEON_2RM_VREV64] = 0x7,
3401     [NEON_2RM_VREV32] = 0x3,
3402     [NEON_2RM_VREV16] = 0x1,
3403     [NEON_2RM_VPADDL] = 0x7,
3404     [NEON_2RM_VPADDL_U] = 0x7,
3405     [NEON_2RM_AESE] = 0x1,
3406     [NEON_2RM_AESMC] = 0x1,
3407     [NEON_2RM_VCLS] = 0x7,
3408     [NEON_2RM_VCLZ] = 0x7,
3409     [NEON_2RM_VCNT] = 0x1,
3410     [NEON_2RM_VMVN] = 0x1,
3411     [NEON_2RM_VPADAL] = 0x7,
3412     [NEON_2RM_VPADAL_U] = 0x7,
3413     [NEON_2RM_VQABS] = 0x7,
3414     [NEON_2RM_VQNEG] = 0x7,
3415     [NEON_2RM_VCGT0] = 0x7,
3416     [NEON_2RM_VCGE0] = 0x7,
3417     [NEON_2RM_VCEQ0] = 0x7,
3418     [NEON_2RM_VCLE0] = 0x7,
3419     [NEON_2RM_VCLT0] = 0x7,
3420     [NEON_2RM_SHA1H] = 0x4,
3421     [NEON_2RM_VABS] = 0x7,
3422     [NEON_2RM_VNEG] = 0x7,
3423     [NEON_2RM_VCGT0_F] = 0x4,
3424     [NEON_2RM_VCGE0_F] = 0x4,
3425     [NEON_2RM_VCEQ0_F] = 0x4,
3426     [NEON_2RM_VCLE0_F] = 0x4,
3427     [NEON_2RM_VCLT0_F] = 0x4,
3428     [NEON_2RM_VABS_F] = 0x4,
3429     [NEON_2RM_VNEG_F] = 0x4,
3430     [NEON_2RM_VSWP] = 0x1,
3431     [NEON_2RM_VTRN] = 0x7,
3432     [NEON_2RM_VUZP] = 0x7,
3433     [NEON_2RM_VZIP] = 0x7,
3434     [NEON_2RM_VMOVN] = 0x7,
3435     [NEON_2RM_VQMOVN] = 0x7,
3436     [NEON_2RM_VSHLL] = 0x7,
3437     [NEON_2RM_SHA1SU1] = 0x4,
3438     [NEON_2RM_VRINTN] = 0x4,
3439     [NEON_2RM_VRINTX] = 0x4,
3440     [NEON_2RM_VRINTA] = 0x4,
3441     [NEON_2RM_VRINTZ] = 0x4,
3442     [NEON_2RM_VCVT_F16_F32] = 0x2,
3443     [NEON_2RM_VRINTM] = 0x4,
3444     [NEON_2RM_VCVT_F32_F16] = 0x2,
3445     [NEON_2RM_VRINTP] = 0x4,
3446     [NEON_2RM_VCVTAU] = 0x4,
3447     [NEON_2RM_VCVTAS] = 0x4,
3448     [NEON_2RM_VCVTNU] = 0x4,
3449     [NEON_2RM_VCVTNS] = 0x4,
3450     [NEON_2RM_VCVTPU] = 0x4,
3451     [NEON_2RM_VCVTPS] = 0x4,
3452     [NEON_2RM_VCVTMU] = 0x4,
3453     [NEON_2RM_VCVTMS] = 0x4,
3454     [NEON_2RM_VRECPE] = 0x4,
3455     [NEON_2RM_VRSQRTE] = 0x4,
3456     [NEON_2RM_VRECPE_F] = 0x4,
3457     [NEON_2RM_VRSQRTE_F] = 0x4,
3458     [NEON_2RM_VCVT_FS] = 0x4,
3459     [NEON_2RM_VCVT_FU] = 0x4,
3460     [NEON_2RM_VCVT_SF] = 0x4,
3461     [NEON_2RM_VCVT_UF] = 0x4,
3462 };
3463
3464 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
3465                             uint32_t opr_sz, uint32_t max_sz,
3466                             gen_helper_gvec_3_ptr *fn)
3467 {
3468     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
3469
3470     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
3471     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
3472                        opr_sz, max_sz, 0, fn);
3473     tcg_temp_free_ptr(qc_ptr);
3474 }
3475
3476 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3477                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3478 {
3479     static gen_helper_gvec_3_ptr * const fns[2] = {
3480         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
3481     };
3482     tcg_debug_assert(vece >= 1 && vece <= 2);
3483     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3484 }
3485
3486 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3487                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3488 {
3489     static gen_helper_gvec_3_ptr * const fns[2] = {
3490         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
3491     };
3492     tcg_debug_assert(vece >= 1 && vece <= 2);
3493     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3494 }
3495
3496 #define GEN_CMP0(NAME, COND)                                            \
3497     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
3498     {                                                                   \
3499         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
3500         tcg_gen_neg_i32(d, d);                                          \
3501     }                                                                   \
3502     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
3503     {                                                                   \
3504         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
3505         tcg_gen_neg_i64(d, d);                                          \
3506     }                                                                   \
3507     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3508     {                                                                   \
3509         TCGv_vec zero = tcg_const_zeros_vec_matching(d);                \
3510         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
3511         tcg_temp_free_vec(zero);                                        \
3512     }                                                                   \
3513     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
3514                             uint32_t opr_sz, uint32_t max_sz)           \
3515     {                                                                   \
3516         const GVecGen2 op[4] = {                                        \
3517             { .fno = gen_helper_gvec_##NAME##0_b,                       \
3518               .fniv = gen_##NAME##0_vec,                                \
3519               .opt_opc = vecop_list_cmp,                                \
3520               .vece = MO_8 },                                           \
3521             { .fno = gen_helper_gvec_##NAME##0_h,                       \
3522               .fniv = gen_##NAME##0_vec,                                \
3523               .opt_opc = vecop_list_cmp,                                \
3524               .vece = MO_16 },                                          \
3525             { .fni4 = gen_##NAME##0_i32,                                \
3526               .fniv = gen_##NAME##0_vec,                                \
3527               .opt_opc = vecop_list_cmp,                                \
3528               .vece = MO_32 },                                          \
3529             { .fni8 = gen_##NAME##0_i64,                                \
3530               .fniv = gen_##NAME##0_vec,                                \
3531               .opt_opc = vecop_list_cmp,                                \
3532               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
3533               .vece = MO_64 },                                          \
3534         };                                                              \
3535         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
3536     }
3537
3538 static const TCGOpcode vecop_list_cmp[] = {
3539     INDEX_op_cmp_vec, 0
3540 };
3541
3542 GEN_CMP0(ceq, TCG_COND_EQ)
3543 GEN_CMP0(cle, TCG_COND_LE)
3544 GEN_CMP0(cge, TCG_COND_GE)
3545 GEN_CMP0(clt, TCG_COND_LT)
3546 GEN_CMP0(cgt, TCG_COND_GT)
3547
3548 #undef GEN_CMP0
3549
3550 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3551 {
3552     tcg_gen_vec_sar8i_i64(a, a, shift);
3553     tcg_gen_vec_add8_i64(d, d, a);
3554 }
3555
3556 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3557 {
3558     tcg_gen_vec_sar16i_i64(a, a, shift);
3559     tcg_gen_vec_add16_i64(d, d, a);
3560 }
3561
3562 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3563 {
3564     tcg_gen_sari_i32(a, a, shift);
3565     tcg_gen_add_i32(d, d, a);
3566 }
3567
3568 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3569 {
3570     tcg_gen_sari_i64(a, a, shift);
3571     tcg_gen_add_i64(d, d, a);
3572 }
3573
3574 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3575 {
3576     tcg_gen_sari_vec(vece, a, a, sh);
3577     tcg_gen_add_vec(vece, d, d, a);
3578 }
3579
3580 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3581                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3582 {
3583     static const TCGOpcode vecop_list[] = {
3584         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3585     };
3586     static const GVecGen2i ops[4] = {
3587         { .fni8 = gen_ssra8_i64,
3588           .fniv = gen_ssra_vec,
3589           .fno = gen_helper_gvec_ssra_b,
3590           .load_dest = true,
3591           .opt_opc = vecop_list,
3592           .vece = MO_8 },
3593         { .fni8 = gen_ssra16_i64,
3594           .fniv = gen_ssra_vec,
3595           .fno = gen_helper_gvec_ssra_h,
3596           .load_dest = true,
3597           .opt_opc = vecop_list,
3598           .vece = MO_16 },
3599         { .fni4 = gen_ssra32_i32,
3600           .fniv = gen_ssra_vec,
3601           .fno = gen_helper_gvec_ssra_s,
3602           .load_dest = true,
3603           .opt_opc = vecop_list,
3604           .vece = MO_32 },
3605         { .fni8 = gen_ssra64_i64,
3606           .fniv = gen_ssra_vec,
3607           .fno = gen_helper_gvec_ssra_b,
3608           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3609           .opt_opc = vecop_list,
3610           .load_dest = true,
3611           .vece = MO_64 },
3612     };
3613
3614     /* tszimm encoding produces immediates in the range [1..esize]. */
3615     tcg_debug_assert(shift > 0);
3616     tcg_debug_assert(shift <= (8 << vece));
3617
3618     /*
3619      * Shifts larger than the element size are architecturally valid.
3620      * Signed results in all sign bits.
3621      */
3622     shift = MIN(shift, (8 << vece) - 1);
3623     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3624 }
3625
3626 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3627 {
3628     tcg_gen_vec_shr8i_i64(a, a, shift);
3629     tcg_gen_vec_add8_i64(d, d, a);
3630 }
3631
3632 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3633 {
3634     tcg_gen_vec_shr16i_i64(a, a, shift);
3635     tcg_gen_vec_add16_i64(d, d, a);
3636 }
3637
3638 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3639 {
3640     tcg_gen_shri_i32(a, a, shift);
3641     tcg_gen_add_i32(d, d, a);
3642 }
3643
3644 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3645 {
3646     tcg_gen_shri_i64(a, a, shift);
3647     tcg_gen_add_i64(d, d, a);
3648 }
3649
3650 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3651 {
3652     tcg_gen_shri_vec(vece, a, a, sh);
3653     tcg_gen_add_vec(vece, d, d, a);
3654 }
3655
3656 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3657                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3658 {
3659     static const TCGOpcode vecop_list[] = {
3660         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3661     };
3662     static const GVecGen2i ops[4] = {
3663         { .fni8 = gen_usra8_i64,
3664           .fniv = gen_usra_vec,
3665           .fno = gen_helper_gvec_usra_b,
3666           .load_dest = true,
3667           .opt_opc = vecop_list,
3668           .vece = MO_8, },
3669         { .fni8 = gen_usra16_i64,
3670           .fniv = gen_usra_vec,
3671           .fno = gen_helper_gvec_usra_h,
3672           .load_dest = true,
3673           .opt_opc = vecop_list,
3674           .vece = MO_16, },
3675         { .fni4 = gen_usra32_i32,
3676           .fniv = gen_usra_vec,
3677           .fno = gen_helper_gvec_usra_s,
3678           .load_dest = true,
3679           .opt_opc = vecop_list,
3680           .vece = MO_32, },
3681         { .fni8 = gen_usra64_i64,
3682           .fniv = gen_usra_vec,
3683           .fno = gen_helper_gvec_usra_d,
3684           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3685           .load_dest = true,
3686           .opt_opc = vecop_list,
3687           .vece = MO_64, },
3688     };
3689
3690     /* tszimm encoding produces immediates in the range [1..esize]. */
3691     tcg_debug_assert(shift > 0);
3692     tcg_debug_assert(shift <= (8 << vece));
3693
3694     /*
3695      * Shifts larger than the element size are architecturally valid.
3696      * Unsigned results in all zeros as input to accumulate: nop.
3697      */
3698     if (shift < (8 << vece)) {
3699         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3700     } else {
3701         /* Nop, but we do need to clear the tail. */
3702         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3703     }
3704 }
3705
3706 /*
3707  * Shift one less than the requested amount, and the low bit is
3708  * the rounding bit.  For the 8 and 16-bit operations, because we
3709  * mask the low bit, we can perform a normal integer shift instead
3710  * of a vector shift.
3711  */
3712 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3713 {
3714     TCGv_i64 t = tcg_temp_new_i64();
3715
3716     tcg_gen_shri_i64(t, a, sh - 1);
3717     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3718     tcg_gen_vec_sar8i_i64(d, a, sh);
3719     tcg_gen_vec_add8_i64(d, d, t);
3720     tcg_temp_free_i64(t);
3721 }
3722
3723 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3724 {
3725     TCGv_i64 t = tcg_temp_new_i64();
3726
3727     tcg_gen_shri_i64(t, a, sh - 1);
3728     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3729     tcg_gen_vec_sar16i_i64(d, a, sh);
3730     tcg_gen_vec_add16_i64(d, d, t);
3731     tcg_temp_free_i64(t);
3732 }
3733
3734 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3735 {
3736     TCGv_i32 t = tcg_temp_new_i32();
3737
3738     tcg_gen_extract_i32(t, a, sh - 1, 1);
3739     tcg_gen_sari_i32(d, a, sh);
3740     tcg_gen_add_i32(d, d, t);
3741     tcg_temp_free_i32(t);
3742 }
3743
3744 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3745 {
3746     TCGv_i64 t = tcg_temp_new_i64();
3747
3748     tcg_gen_extract_i64(t, a, sh - 1, 1);
3749     tcg_gen_sari_i64(d, a, sh);
3750     tcg_gen_add_i64(d, d, t);
3751     tcg_temp_free_i64(t);
3752 }
3753
3754 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3755 {
3756     TCGv_vec t = tcg_temp_new_vec_matching(d);
3757     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3758
3759     tcg_gen_shri_vec(vece, t, a, sh - 1);
3760     tcg_gen_dupi_vec(vece, ones, 1);
3761     tcg_gen_and_vec(vece, t, t, ones);
3762     tcg_gen_sari_vec(vece, d, a, sh);
3763     tcg_gen_add_vec(vece, d, d, t);
3764
3765     tcg_temp_free_vec(t);
3766     tcg_temp_free_vec(ones);
3767 }
3768
3769 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3770                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3771 {
3772     static const TCGOpcode vecop_list[] = {
3773         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3774     };
3775     static const GVecGen2i ops[4] = {
3776         { .fni8 = gen_srshr8_i64,
3777           .fniv = gen_srshr_vec,
3778           .fno = gen_helper_gvec_srshr_b,
3779           .opt_opc = vecop_list,
3780           .vece = MO_8 },
3781         { .fni8 = gen_srshr16_i64,
3782           .fniv = gen_srshr_vec,
3783           .fno = gen_helper_gvec_srshr_h,
3784           .opt_opc = vecop_list,
3785           .vece = MO_16 },
3786         { .fni4 = gen_srshr32_i32,
3787           .fniv = gen_srshr_vec,
3788           .fno = gen_helper_gvec_srshr_s,
3789           .opt_opc = vecop_list,
3790           .vece = MO_32 },
3791         { .fni8 = gen_srshr64_i64,
3792           .fniv = gen_srshr_vec,
3793           .fno = gen_helper_gvec_srshr_d,
3794           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3795           .opt_opc = vecop_list,
3796           .vece = MO_64 },
3797     };
3798
3799     /* tszimm encoding produces immediates in the range [1..esize] */
3800     tcg_debug_assert(shift > 0);
3801     tcg_debug_assert(shift <= (8 << vece));
3802
3803     if (shift == (8 << vece)) {
3804         /*
3805          * Shifts larger than the element size are architecturally valid.
3806          * Signed results in all sign bits.  With rounding, this produces
3807          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3808          * I.e. always zero.
3809          */
3810         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3811     } else {
3812         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3813     }
3814 }
3815
3816 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3817 {
3818     TCGv_i64 t = tcg_temp_new_i64();
3819
3820     gen_srshr8_i64(t, a, sh);
3821     tcg_gen_vec_add8_i64(d, d, t);
3822     tcg_temp_free_i64(t);
3823 }
3824
3825 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3826 {
3827     TCGv_i64 t = tcg_temp_new_i64();
3828
3829     gen_srshr16_i64(t, a, sh);
3830     tcg_gen_vec_add16_i64(d, d, t);
3831     tcg_temp_free_i64(t);
3832 }
3833
3834 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3835 {
3836     TCGv_i32 t = tcg_temp_new_i32();
3837
3838     gen_srshr32_i32(t, a, sh);
3839     tcg_gen_add_i32(d, d, t);
3840     tcg_temp_free_i32(t);
3841 }
3842
3843 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3844 {
3845     TCGv_i64 t = tcg_temp_new_i64();
3846
3847     gen_srshr64_i64(t, a, sh);
3848     tcg_gen_add_i64(d, d, t);
3849     tcg_temp_free_i64(t);
3850 }
3851
3852 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3853 {
3854     TCGv_vec t = tcg_temp_new_vec_matching(d);
3855
3856     gen_srshr_vec(vece, t, a, sh);
3857     tcg_gen_add_vec(vece, d, d, t);
3858     tcg_temp_free_vec(t);
3859 }
3860
3861 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3862                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3863 {
3864     static const TCGOpcode vecop_list[] = {
3865         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3866     };
3867     static const GVecGen2i ops[4] = {
3868         { .fni8 = gen_srsra8_i64,
3869           .fniv = gen_srsra_vec,
3870           .fno = gen_helper_gvec_srsra_b,
3871           .opt_opc = vecop_list,
3872           .load_dest = true,
3873           .vece = MO_8 },
3874         { .fni8 = gen_srsra16_i64,
3875           .fniv = gen_srsra_vec,
3876           .fno = gen_helper_gvec_srsra_h,
3877           .opt_opc = vecop_list,
3878           .load_dest = true,
3879           .vece = MO_16 },
3880         { .fni4 = gen_srsra32_i32,
3881           .fniv = gen_srsra_vec,
3882           .fno = gen_helper_gvec_srsra_s,
3883           .opt_opc = vecop_list,
3884           .load_dest = true,
3885           .vece = MO_32 },
3886         { .fni8 = gen_srsra64_i64,
3887           .fniv = gen_srsra_vec,
3888           .fno = gen_helper_gvec_srsra_d,
3889           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3890           .opt_opc = vecop_list,
3891           .load_dest = true,
3892           .vece = MO_64 },
3893     };
3894
3895     /* tszimm encoding produces immediates in the range [1..esize] */
3896     tcg_debug_assert(shift > 0);
3897     tcg_debug_assert(shift <= (8 << vece));
3898
3899     /*
3900      * Shifts larger than the element size are architecturally valid.
3901      * Signed results in all sign bits.  With rounding, this produces
3902      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3903      * I.e. always zero.  With accumulation, this leaves D unchanged.
3904      */
3905     if (shift == (8 << vece)) {
3906         /* Nop, but we do need to clear the tail. */
3907         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3908     } else {
3909         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3910     }
3911 }
3912
3913 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3914 {
3915     TCGv_i64 t = tcg_temp_new_i64();
3916
3917     tcg_gen_shri_i64(t, a, sh - 1);
3918     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3919     tcg_gen_vec_shr8i_i64(d, a, sh);
3920     tcg_gen_vec_add8_i64(d, d, t);
3921     tcg_temp_free_i64(t);
3922 }
3923
3924 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3925 {
3926     TCGv_i64 t = tcg_temp_new_i64();
3927
3928     tcg_gen_shri_i64(t, a, sh - 1);
3929     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3930     tcg_gen_vec_shr16i_i64(d, a, sh);
3931     tcg_gen_vec_add16_i64(d, d, t);
3932     tcg_temp_free_i64(t);
3933 }
3934
3935 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3936 {
3937     TCGv_i32 t = tcg_temp_new_i32();
3938
3939     tcg_gen_extract_i32(t, a, sh - 1, 1);
3940     tcg_gen_shri_i32(d, a, sh);
3941     tcg_gen_add_i32(d, d, t);
3942     tcg_temp_free_i32(t);
3943 }
3944
3945 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3946 {
3947     TCGv_i64 t = tcg_temp_new_i64();
3948
3949     tcg_gen_extract_i64(t, a, sh - 1, 1);
3950     tcg_gen_shri_i64(d, a, sh);
3951     tcg_gen_add_i64(d, d, t);
3952     tcg_temp_free_i64(t);
3953 }
3954
3955 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3956 {
3957     TCGv_vec t = tcg_temp_new_vec_matching(d);
3958     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3959
3960     tcg_gen_shri_vec(vece, t, a, shift - 1);
3961     tcg_gen_dupi_vec(vece, ones, 1);
3962     tcg_gen_and_vec(vece, t, t, ones);
3963     tcg_gen_shri_vec(vece, d, a, shift);
3964     tcg_gen_add_vec(vece, d, d, t);
3965
3966     tcg_temp_free_vec(t);
3967     tcg_temp_free_vec(ones);
3968 }
3969
3970 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3971                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3972 {
3973     static const TCGOpcode vecop_list[] = {
3974         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3975     };
3976     static const GVecGen2i ops[4] = {
3977         { .fni8 = gen_urshr8_i64,
3978           .fniv = gen_urshr_vec,
3979           .fno = gen_helper_gvec_urshr_b,
3980           .opt_opc = vecop_list,
3981           .vece = MO_8 },
3982         { .fni8 = gen_urshr16_i64,
3983           .fniv = gen_urshr_vec,
3984           .fno = gen_helper_gvec_urshr_h,
3985           .opt_opc = vecop_list,
3986           .vece = MO_16 },
3987         { .fni4 = gen_urshr32_i32,
3988           .fniv = gen_urshr_vec,
3989           .fno = gen_helper_gvec_urshr_s,
3990           .opt_opc = vecop_list,
3991           .vece = MO_32 },
3992         { .fni8 = gen_urshr64_i64,
3993           .fniv = gen_urshr_vec,
3994           .fno = gen_helper_gvec_urshr_d,
3995           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3996           .opt_opc = vecop_list,
3997           .vece = MO_64 },
3998     };
3999
4000     /* tszimm encoding produces immediates in the range [1..esize] */
4001     tcg_debug_assert(shift > 0);
4002     tcg_debug_assert(shift <= (8 << vece));
4003
4004     if (shift == (8 << vece)) {
4005         /*
4006          * Shifts larger than the element size are architecturally valid.
4007          * Unsigned results in zero.  With rounding, this produces a
4008          * copy of the most significant bit.
4009          */
4010         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
4011     } else {
4012         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4013     }
4014 }
4015
4016 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4017 {
4018     TCGv_i64 t = tcg_temp_new_i64();
4019
4020     if (sh == 8) {
4021         tcg_gen_vec_shr8i_i64(t, a, 7);
4022     } else {
4023         gen_urshr8_i64(t, a, sh);
4024     }
4025     tcg_gen_vec_add8_i64(d, d, t);
4026     tcg_temp_free_i64(t);
4027 }
4028
4029 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4030 {
4031     TCGv_i64 t = tcg_temp_new_i64();
4032
4033     if (sh == 16) {
4034         tcg_gen_vec_shr16i_i64(t, a, 15);
4035     } else {
4036         gen_urshr16_i64(t, a, sh);
4037     }
4038     tcg_gen_vec_add16_i64(d, d, t);
4039     tcg_temp_free_i64(t);
4040 }
4041
4042 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4043 {
4044     TCGv_i32 t = tcg_temp_new_i32();
4045
4046     if (sh == 32) {
4047         tcg_gen_shri_i32(t, a, 31);
4048     } else {
4049         gen_urshr32_i32(t, a, sh);
4050     }
4051     tcg_gen_add_i32(d, d, t);
4052     tcg_temp_free_i32(t);
4053 }
4054
4055 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4056 {
4057     TCGv_i64 t = tcg_temp_new_i64();
4058
4059     if (sh == 64) {
4060         tcg_gen_shri_i64(t, a, 63);
4061     } else {
4062         gen_urshr64_i64(t, a, sh);
4063     }
4064     tcg_gen_add_i64(d, d, t);
4065     tcg_temp_free_i64(t);
4066 }
4067
4068 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4069 {
4070     TCGv_vec t = tcg_temp_new_vec_matching(d);
4071
4072     if (sh == (8 << vece)) {
4073         tcg_gen_shri_vec(vece, t, a, sh - 1);
4074     } else {
4075         gen_urshr_vec(vece, t, a, sh);
4076     }
4077     tcg_gen_add_vec(vece, d, d, t);
4078     tcg_temp_free_vec(t);
4079 }
4080
4081 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4082                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4083 {
4084     static const TCGOpcode vecop_list[] = {
4085         INDEX_op_shri_vec, INDEX_op_add_vec, 0
4086     };
4087     static const GVecGen2i ops[4] = {
4088         { .fni8 = gen_ursra8_i64,
4089           .fniv = gen_ursra_vec,
4090           .fno = gen_helper_gvec_ursra_b,
4091           .opt_opc = vecop_list,
4092           .load_dest = true,
4093           .vece = MO_8 },
4094         { .fni8 = gen_ursra16_i64,
4095           .fniv = gen_ursra_vec,
4096           .fno = gen_helper_gvec_ursra_h,
4097           .opt_opc = vecop_list,
4098           .load_dest = true,
4099           .vece = MO_16 },
4100         { .fni4 = gen_ursra32_i32,
4101           .fniv = gen_ursra_vec,
4102           .fno = gen_helper_gvec_ursra_s,
4103           .opt_opc = vecop_list,
4104           .load_dest = true,
4105           .vece = MO_32 },
4106         { .fni8 = gen_ursra64_i64,
4107           .fniv = gen_ursra_vec,
4108           .fno = gen_helper_gvec_ursra_d,
4109           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4110           .opt_opc = vecop_list,
4111           .load_dest = true,
4112           .vece = MO_64 },
4113     };
4114
4115     /* tszimm encoding produces immediates in the range [1..esize] */
4116     tcg_debug_assert(shift > 0);
4117     tcg_debug_assert(shift <= (8 << vece));
4118
4119     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4120 }
4121
4122 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4123 {
4124     uint64_t mask = dup_const(MO_8, 0xff >> shift);
4125     TCGv_i64 t = tcg_temp_new_i64();
4126
4127     tcg_gen_shri_i64(t, a, shift);
4128     tcg_gen_andi_i64(t, t, mask);
4129     tcg_gen_andi_i64(d, d, ~mask);
4130     tcg_gen_or_i64(d, d, t);
4131     tcg_temp_free_i64(t);
4132 }
4133
4134 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4135 {
4136     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4137     TCGv_i64 t = tcg_temp_new_i64();
4138
4139     tcg_gen_shri_i64(t, a, shift);
4140     tcg_gen_andi_i64(t, t, mask);
4141     tcg_gen_andi_i64(d, d, ~mask);
4142     tcg_gen_or_i64(d, d, t);
4143     tcg_temp_free_i64(t);
4144 }
4145
4146 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4147 {
4148     tcg_gen_shri_i32(a, a, shift);
4149     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4150 }
4151
4152 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4153 {
4154     tcg_gen_shri_i64(a, a, shift);
4155     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4156 }
4157
4158 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4159 {
4160     TCGv_vec t = tcg_temp_new_vec_matching(d);
4161     TCGv_vec m = tcg_temp_new_vec_matching(d);
4162
4163     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4164     tcg_gen_shri_vec(vece, t, a, sh);
4165     tcg_gen_and_vec(vece, d, d, m);
4166     tcg_gen_or_vec(vece, d, d, t);
4167
4168     tcg_temp_free_vec(t);
4169     tcg_temp_free_vec(m);
4170 }
4171
4172 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4173                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4174 {
4175     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
4176     const GVecGen2i ops[4] = {
4177         { .fni8 = gen_shr8_ins_i64,
4178           .fniv = gen_shr_ins_vec,
4179           .fno = gen_helper_gvec_sri_b,
4180           .load_dest = true,
4181           .opt_opc = vecop_list,
4182           .vece = MO_8 },
4183         { .fni8 = gen_shr16_ins_i64,
4184           .fniv = gen_shr_ins_vec,
4185           .fno = gen_helper_gvec_sri_h,
4186           .load_dest = true,
4187           .opt_opc = vecop_list,
4188           .vece = MO_16 },
4189         { .fni4 = gen_shr32_ins_i32,
4190           .fniv = gen_shr_ins_vec,
4191           .fno = gen_helper_gvec_sri_s,
4192           .load_dest = true,
4193           .opt_opc = vecop_list,
4194           .vece = MO_32 },
4195         { .fni8 = gen_shr64_ins_i64,
4196           .fniv = gen_shr_ins_vec,
4197           .fno = gen_helper_gvec_sri_d,
4198           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4199           .load_dest = true,
4200           .opt_opc = vecop_list,
4201           .vece = MO_64 },
4202     };
4203
4204     /* tszimm encoding produces immediates in the range [1..esize]. */
4205     tcg_debug_assert(shift > 0);
4206     tcg_debug_assert(shift <= (8 << vece));
4207
4208     /* Shift of esize leaves destination unchanged. */
4209     if (shift < (8 << vece)) {
4210         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4211     } else {
4212         /* Nop, but we do need to clear the tail. */
4213         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4214     }
4215 }
4216
4217 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4218 {
4219     uint64_t mask = dup_const(MO_8, 0xff << shift);
4220     TCGv_i64 t = tcg_temp_new_i64();
4221
4222     tcg_gen_shli_i64(t, a, shift);
4223     tcg_gen_andi_i64(t, t, mask);
4224     tcg_gen_andi_i64(d, d, ~mask);
4225     tcg_gen_or_i64(d, d, t);
4226     tcg_temp_free_i64(t);
4227 }
4228
4229 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4230 {
4231     uint64_t mask = dup_const(MO_16, 0xffff << shift);
4232     TCGv_i64 t = tcg_temp_new_i64();
4233
4234     tcg_gen_shli_i64(t, a, shift);
4235     tcg_gen_andi_i64(t, t, mask);
4236     tcg_gen_andi_i64(d, d, ~mask);
4237     tcg_gen_or_i64(d, d, t);
4238     tcg_temp_free_i64(t);
4239 }
4240
4241 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4242 {
4243     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4244 }
4245
4246 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4247 {
4248     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4249 }
4250
4251 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4252 {
4253     TCGv_vec t = tcg_temp_new_vec_matching(d);
4254     TCGv_vec m = tcg_temp_new_vec_matching(d);
4255
4256     tcg_gen_shli_vec(vece, t, a, sh);
4257     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4258     tcg_gen_and_vec(vece, d, d, m);
4259     tcg_gen_or_vec(vece, d, d, t);
4260
4261     tcg_temp_free_vec(t);
4262     tcg_temp_free_vec(m);
4263 }
4264
4265 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4266                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4267 {
4268     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
4269     const GVecGen2i ops[4] = {
4270         { .fni8 = gen_shl8_ins_i64,
4271           .fniv = gen_shl_ins_vec,
4272           .fno = gen_helper_gvec_sli_b,
4273           .load_dest = true,
4274           .opt_opc = vecop_list,
4275           .vece = MO_8 },
4276         { .fni8 = gen_shl16_ins_i64,
4277           .fniv = gen_shl_ins_vec,
4278           .fno = gen_helper_gvec_sli_h,
4279           .load_dest = true,
4280           .opt_opc = vecop_list,
4281           .vece = MO_16 },
4282         { .fni4 = gen_shl32_ins_i32,
4283           .fniv = gen_shl_ins_vec,
4284           .fno = gen_helper_gvec_sli_s,
4285           .load_dest = true,
4286           .opt_opc = vecop_list,
4287           .vece = MO_32 },
4288         { .fni8 = gen_shl64_ins_i64,
4289           .fniv = gen_shl_ins_vec,
4290           .fno = gen_helper_gvec_sli_d,
4291           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4292           .load_dest = true,
4293           .opt_opc = vecop_list,
4294           .vece = MO_64 },
4295     };
4296
4297     /* tszimm encoding produces immediates in the range [0..esize-1]. */
4298     tcg_debug_assert(shift >= 0);
4299     tcg_debug_assert(shift < (8 << vece));
4300
4301     if (shift == 0) {
4302         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
4303     } else {
4304         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4305     }
4306 }
4307
4308 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4309 {
4310     gen_helper_neon_mul_u8(a, a, b);
4311     gen_helper_neon_add_u8(d, d, a);
4312 }
4313
4314 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4315 {
4316     gen_helper_neon_mul_u8(a, a, b);
4317     gen_helper_neon_sub_u8(d, d, a);
4318 }
4319
4320 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4321 {
4322     gen_helper_neon_mul_u16(a, a, b);
4323     gen_helper_neon_add_u16(d, d, a);
4324 }
4325
4326 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4327 {
4328     gen_helper_neon_mul_u16(a, a, b);
4329     gen_helper_neon_sub_u16(d, d, a);
4330 }
4331
4332 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4333 {
4334     tcg_gen_mul_i32(a, a, b);
4335     tcg_gen_add_i32(d, d, a);
4336 }
4337
4338 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4339 {
4340     tcg_gen_mul_i32(a, a, b);
4341     tcg_gen_sub_i32(d, d, a);
4342 }
4343
4344 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4345 {
4346     tcg_gen_mul_i64(a, a, b);
4347     tcg_gen_add_i64(d, d, a);
4348 }
4349
4350 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4351 {
4352     tcg_gen_mul_i64(a, a, b);
4353     tcg_gen_sub_i64(d, d, a);
4354 }
4355
4356 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4357 {
4358     tcg_gen_mul_vec(vece, a, a, b);
4359     tcg_gen_add_vec(vece, d, d, a);
4360 }
4361
4362 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4363 {
4364     tcg_gen_mul_vec(vece, a, a, b);
4365     tcg_gen_sub_vec(vece, d, d, a);
4366 }
4367
4368 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4369  * these tables are shared with AArch64 which does support them.
4370  */
4371 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4372                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4373 {
4374     static const TCGOpcode vecop_list[] = {
4375         INDEX_op_mul_vec, INDEX_op_add_vec, 0
4376     };
4377     static const GVecGen3 ops[4] = {
4378         { .fni4 = gen_mla8_i32,
4379           .fniv = gen_mla_vec,
4380           .load_dest = true,
4381           .opt_opc = vecop_list,
4382           .vece = MO_8 },
4383         { .fni4 = gen_mla16_i32,
4384           .fniv = gen_mla_vec,
4385           .load_dest = true,
4386           .opt_opc = vecop_list,
4387           .vece = MO_16 },
4388         { .fni4 = gen_mla32_i32,
4389           .fniv = gen_mla_vec,
4390           .load_dest = true,
4391           .opt_opc = vecop_list,
4392           .vece = MO_32 },
4393         { .fni8 = gen_mla64_i64,
4394           .fniv = gen_mla_vec,
4395           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4396           .load_dest = true,
4397           .opt_opc = vecop_list,
4398           .vece = MO_64 },
4399     };
4400     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4401 }
4402
4403 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4404                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4405 {
4406     static const TCGOpcode vecop_list[] = {
4407         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4408     };
4409     static const GVecGen3 ops[4] = {
4410         { .fni4 = gen_mls8_i32,
4411           .fniv = gen_mls_vec,
4412           .load_dest = true,
4413           .opt_opc = vecop_list,
4414           .vece = MO_8 },
4415         { .fni4 = gen_mls16_i32,
4416           .fniv = gen_mls_vec,
4417           .load_dest = true,
4418           .opt_opc = vecop_list,
4419           .vece = MO_16 },
4420         { .fni4 = gen_mls32_i32,
4421           .fniv = gen_mls_vec,
4422           .load_dest = true,
4423           .opt_opc = vecop_list,
4424           .vece = MO_32 },
4425         { .fni8 = gen_mls64_i64,
4426           .fniv = gen_mls_vec,
4427           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4428           .load_dest = true,
4429           .opt_opc = vecop_list,
4430           .vece = MO_64 },
4431     };
4432     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4433 }
4434
4435 /* CMTST : test is "if (X & Y != 0)". */
4436 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4437 {
4438     tcg_gen_and_i32(d, a, b);
4439     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4440     tcg_gen_neg_i32(d, d);
4441 }
4442
4443 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4444 {
4445     tcg_gen_and_i64(d, a, b);
4446     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4447     tcg_gen_neg_i64(d, d);
4448 }
4449
4450 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4451 {
4452     tcg_gen_and_vec(vece, d, a, b);
4453     tcg_gen_dupi_vec(vece, a, 0);
4454     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4455 }
4456
4457 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4458                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4459 {
4460     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
4461     static const GVecGen3 ops[4] = {
4462         { .fni4 = gen_helper_neon_tst_u8,
4463           .fniv = gen_cmtst_vec,
4464           .opt_opc = vecop_list,
4465           .vece = MO_8 },
4466         { .fni4 = gen_helper_neon_tst_u16,
4467           .fniv = gen_cmtst_vec,
4468           .opt_opc = vecop_list,
4469           .vece = MO_16 },
4470         { .fni4 = gen_cmtst_i32,
4471           .fniv = gen_cmtst_vec,
4472           .opt_opc = vecop_list,
4473           .vece = MO_32 },
4474         { .fni8 = gen_cmtst_i64,
4475           .fniv = gen_cmtst_vec,
4476           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4477           .opt_opc = vecop_list,
4478           .vece = MO_64 },
4479     };
4480     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4481 }
4482
4483 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4484 {
4485     TCGv_i32 lval = tcg_temp_new_i32();
4486     TCGv_i32 rval = tcg_temp_new_i32();
4487     TCGv_i32 lsh = tcg_temp_new_i32();
4488     TCGv_i32 rsh = tcg_temp_new_i32();
4489     TCGv_i32 zero = tcg_const_i32(0);
4490     TCGv_i32 max = tcg_const_i32(32);
4491
4492     /*
4493      * Rely on the TCG guarantee that out of range shifts produce
4494      * unspecified results, not undefined behaviour (i.e. no trap).
4495      * Discard out-of-range results after the fact.
4496      */
4497     tcg_gen_ext8s_i32(lsh, shift);
4498     tcg_gen_neg_i32(rsh, lsh);
4499     tcg_gen_shl_i32(lval, src, lsh);
4500     tcg_gen_shr_i32(rval, src, rsh);
4501     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4502     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4503
4504     tcg_temp_free_i32(lval);
4505     tcg_temp_free_i32(rval);
4506     tcg_temp_free_i32(lsh);
4507     tcg_temp_free_i32(rsh);
4508     tcg_temp_free_i32(zero);
4509     tcg_temp_free_i32(max);
4510 }
4511
4512 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4513 {
4514     TCGv_i64 lval = tcg_temp_new_i64();
4515     TCGv_i64 rval = tcg_temp_new_i64();
4516     TCGv_i64 lsh = tcg_temp_new_i64();
4517     TCGv_i64 rsh = tcg_temp_new_i64();
4518     TCGv_i64 zero = tcg_const_i64(0);
4519     TCGv_i64 max = tcg_const_i64(64);
4520
4521     /*
4522      * Rely on the TCG guarantee that out of range shifts produce
4523      * unspecified results, not undefined behaviour (i.e. no trap).
4524      * Discard out-of-range results after the fact.
4525      */
4526     tcg_gen_ext8s_i64(lsh, shift);
4527     tcg_gen_neg_i64(rsh, lsh);
4528     tcg_gen_shl_i64(lval, src, lsh);
4529     tcg_gen_shr_i64(rval, src, rsh);
4530     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4531     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4532
4533     tcg_temp_free_i64(lval);
4534     tcg_temp_free_i64(rval);
4535     tcg_temp_free_i64(lsh);
4536     tcg_temp_free_i64(rsh);
4537     tcg_temp_free_i64(zero);
4538     tcg_temp_free_i64(max);
4539 }
4540
4541 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4542                          TCGv_vec src, TCGv_vec shift)
4543 {
4544     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4545     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4546     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4547     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4548     TCGv_vec msk, max;
4549
4550     tcg_gen_neg_vec(vece, rsh, shift);
4551     if (vece == MO_8) {
4552         tcg_gen_mov_vec(lsh, shift);
4553     } else {
4554         msk = tcg_temp_new_vec_matching(dst);
4555         tcg_gen_dupi_vec(vece, msk, 0xff);
4556         tcg_gen_and_vec(vece, lsh, shift, msk);
4557         tcg_gen_and_vec(vece, rsh, rsh, msk);
4558         tcg_temp_free_vec(msk);
4559     }
4560
4561     /*
4562      * Rely on the TCG guarantee that out of range shifts produce
4563      * unspecified results, not undefined behaviour (i.e. no trap).
4564      * Discard out-of-range results after the fact.
4565      */
4566     tcg_gen_shlv_vec(vece, lval, src, lsh);
4567     tcg_gen_shrv_vec(vece, rval, src, rsh);
4568
4569     max = tcg_temp_new_vec_matching(dst);
4570     tcg_gen_dupi_vec(vece, max, 8 << vece);
4571
4572     /*
4573      * The choice of LT (signed) and GEU (unsigned) are biased toward
4574      * the instructions of the x86_64 host.  For MO_8, the whole byte
4575      * is significant so we must use an unsigned compare; otherwise we
4576      * have already masked to a byte and so a signed compare works.
4577      * Other tcg hosts have a full set of comparisons and do not care.
4578      */
4579     if (vece == MO_8) {
4580         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4581         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4582         tcg_gen_andc_vec(vece, lval, lval, lsh);
4583         tcg_gen_andc_vec(vece, rval, rval, rsh);
4584     } else {
4585         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4586         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4587         tcg_gen_and_vec(vece, lval, lval, lsh);
4588         tcg_gen_and_vec(vece, rval, rval, rsh);
4589     }
4590     tcg_gen_or_vec(vece, dst, lval, rval);
4591
4592     tcg_temp_free_vec(max);
4593     tcg_temp_free_vec(lval);
4594     tcg_temp_free_vec(rval);
4595     tcg_temp_free_vec(lsh);
4596     tcg_temp_free_vec(rsh);
4597 }
4598
4599 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4600                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4601 {
4602     static const TCGOpcode vecop_list[] = {
4603         INDEX_op_neg_vec, INDEX_op_shlv_vec,
4604         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4605     };
4606     static const GVecGen3 ops[4] = {
4607         { .fniv = gen_ushl_vec,
4608           .fno = gen_helper_gvec_ushl_b,
4609           .opt_opc = vecop_list,
4610           .vece = MO_8 },
4611         { .fniv = gen_ushl_vec,
4612           .fno = gen_helper_gvec_ushl_h,
4613           .opt_opc = vecop_list,
4614           .vece = MO_16 },
4615         { .fni4 = gen_ushl_i32,
4616           .fniv = gen_ushl_vec,
4617           .opt_opc = vecop_list,
4618           .vece = MO_32 },
4619         { .fni8 = gen_ushl_i64,
4620           .fniv = gen_ushl_vec,
4621           .opt_opc = vecop_list,
4622           .vece = MO_64 },
4623     };
4624     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4625 }
4626
4627 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4628 {
4629     TCGv_i32 lval = tcg_temp_new_i32();
4630     TCGv_i32 rval = tcg_temp_new_i32();
4631     TCGv_i32 lsh = tcg_temp_new_i32();
4632     TCGv_i32 rsh = tcg_temp_new_i32();
4633     TCGv_i32 zero = tcg_const_i32(0);
4634     TCGv_i32 max = tcg_const_i32(31);
4635
4636     /*
4637      * Rely on the TCG guarantee that out of range shifts produce
4638      * unspecified results, not undefined behaviour (i.e. no trap).
4639      * Discard out-of-range results after the fact.
4640      */
4641     tcg_gen_ext8s_i32(lsh, shift);
4642     tcg_gen_neg_i32(rsh, lsh);
4643     tcg_gen_shl_i32(lval, src, lsh);
4644     tcg_gen_umin_i32(rsh, rsh, max);
4645     tcg_gen_sar_i32(rval, src, rsh);
4646     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4647     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4648
4649     tcg_temp_free_i32(lval);
4650     tcg_temp_free_i32(rval);
4651     tcg_temp_free_i32(lsh);
4652     tcg_temp_free_i32(rsh);
4653     tcg_temp_free_i32(zero);
4654     tcg_temp_free_i32(max);
4655 }
4656
4657 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4658 {
4659     TCGv_i64 lval = tcg_temp_new_i64();
4660     TCGv_i64 rval = tcg_temp_new_i64();
4661     TCGv_i64 lsh = tcg_temp_new_i64();
4662     TCGv_i64 rsh = tcg_temp_new_i64();
4663     TCGv_i64 zero = tcg_const_i64(0);
4664     TCGv_i64 max = tcg_const_i64(63);
4665
4666     /*
4667      * Rely on the TCG guarantee that out of range shifts produce
4668      * unspecified results, not undefined behaviour (i.e. no trap).
4669      * Discard out-of-range results after the fact.
4670      */
4671     tcg_gen_ext8s_i64(lsh, shift);
4672     tcg_gen_neg_i64(rsh, lsh);
4673     tcg_gen_shl_i64(lval, src, lsh);
4674     tcg_gen_umin_i64(rsh, rsh, max);
4675     tcg_gen_sar_i64(rval, src, rsh);
4676     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4677     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4678
4679     tcg_temp_free_i64(lval);
4680     tcg_temp_free_i64(rval);
4681     tcg_temp_free_i64(lsh);
4682     tcg_temp_free_i64(rsh);
4683     tcg_temp_free_i64(zero);
4684     tcg_temp_free_i64(max);
4685 }
4686
4687 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4688                          TCGv_vec src, TCGv_vec shift)
4689 {
4690     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4691     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4692     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4693     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4694     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4695
4696     /*
4697      * Rely on the TCG guarantee that out of range shifts produce
4698      * unspecified results, not undefined behaviour (i.e. no trap).
4699      * Discard out-of-range results after the fact.
4700      */
4701     tcg_gen_neg_vec(vece, rsh, shift);
4702     if (vece == MO_8) {
4703         tcg_gen_mov_vec(lsh, shift);
4704     } else {
4705         tcg_gen_dupi_vec(vece, tmp, 0xff);
4706         tcg_gen_and_vec(vece, lsh, shift, tmp);
4707         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4708     }
4709
4710     /* Bound rsh so out of bound right shift gets -1.  */
4711     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4712     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4713     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4714
4715     tcg_gen_shlv_vec(vece, lval, src, lsh);
4716     tcg_gen_sarv_vec(vece, rval, src, rsh);
4717
4718     /* Select in-bound left shift.  */
4719     tcg_gen_andc_vec(vece, lval, lval, tmp);
4720
4721     /* Select between left and right shift.  */
4722     if (vece == MO_8) {
4723         tcg_gen_dupi_vec(vece, tmp, 0);
4724         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4725     } else {
4726         tcg_gen_dupi_vec(vece, tmp, 0x80);
4727         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4728     }
4729
4730     tcg_temp_free_vec(lval);
4731     tcg_temp_free_vec(rval);
4732     tcg_temp_free_vec(lsh);
4733     tcg_temp_free_vec(rsh);
4734     tcg_temp_free_vec(tmp);
4735 }
4736
4737 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4738                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4739 {
4740     static const TCGOpcode vecop_list[] = {
4741         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4742         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4743     };
4744     static const GVecGen3 ops[4] = {
4745         { .fniv = gen_sshl_vec,
4746           .fno = gen_helper_gvec_sshl_b,
4747           .opt_opc = vecop_list,
4748           .vece = MO_8 },
4749         { .fniv = gen_sshl_vec,
4750           .fno = gen_helper_gvec_sshl_h,
4751           .opt_opc = vecop_list,
4752           .vece = MO_16 },
4753         { .fni4 = gen_sshl_i32,
4754           .fniv = gen_sshl_vec,
4755           .opt_opc = vecop_list,
4756           .vece = MO_32 },
4757         { .fni8 = gen_sshl_i64,
4758           .fniv = gen_sshl_vec,
4759           .opt_opc = vecop_list,
4760           .vece = MO_64 },
4761     };
4762     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4763 }
4764
4765 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4766                           TCGv_vec a, TCGv_vec b)
4767 {
4768     TCGv_vec x = tcg_temp_new_vec_matching(t);
4769     tcg_gen_add_vec(vece, x, a, b);
4770     tcg_gen_usadd_vec(vece, t, a, b);
4771     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4772     tcg_gen_or_vec(vece, sat, sat, x);
4773     tcg_temp_free_vec(x);
4774 }
4775
4776 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4777                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4778 {
4779     static const TCGOpcode vecop_list[] = {
4780         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4781     };
4782     static const GVecGen4 ops[4] = {
4783         { .fniv = gen_uqadd_vec,
4784           .fno = gen_helper_gvec_uqadd_b,
4785           .write_aofs = true,
4786           .opt_opc = vecop_list,
4787           .vece = MO_8 },
4788         { .fniv = gen_uqadd_vec,
4789           .fno = gen_helper_gvec_uqadd_h,
4790           .write_aofs = true,
4791           .opt_opc = vecop_list,
4792           .vece = MO_16 },
4793         { .fniv = gen_uqadd_vec,
4794           .fno = gen_helper_gvec_uqadd_s,
4795           .write_aofs = true,
4796           .opt_opc = vecop_list,
4797           .vece = MO_32 },
4798         { .fniv = gen_uqadd_vec,
4799           .fno = gen_helper_gvec_uqadd_d,
4800           .write_aofs = true,
4801           .opt_opc = vecop_list,
4802           .vece = MO_64 },
4803     };
4804     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4805                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4806 }
4807
4808 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4809                           TCGv_vec a, TCGv_vec b)
4810 {
4811     TCGv_vec x = tcg_temp_new_vec_matching(t);
4812     tcg_gen_add_vec(vece, x, a, b);
4813     tcg_gen_ssadd_vec(vece, t, a, b);
4814     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4815     tcg_gen_or_vec(vece, sat, sat, x);
4816     tcg_temp_free_vec(x);
4817 }
4818
4819 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4820                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4821 {
4822     static const TCGOpcode vecop_list[] = {
4823         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4824     };
4825     static const GVecGen4 ops[4] = {
4826         { .fniv = gen_sqadd_vec,
4827           .fno = gen_helper_gvec_sqadd_b,
4828           .opt_opc = vecop_list,
4829           .write_aofs = true,
4830           .vece = MO_8 },
4831         { .fniv = gen_sqadd_vec,
4832           .fno = gen_helper_gvec_sqadd_h,
4833           .opt_opc = vecop_list,
4834           .write_aofs = true,
4835           .vece = MO_16 },
4836         { .fniv = gen_sqadd_vec,
4837           .fno = gen_helper_gvec_sqadd_s,
4838           .opt_opc = vecop_list,
4839           .write_aofs = true,
4840           .vece = MO_32 },
4841         { .fniv = gen_sqadd_vec,
4842           .fno = gen_helper_gvec_sqadd_d,
4843           .opt_opc = vecop_list,
4844           .write_aofs = true,
4845           .vece = MO_64 },
4846     };
4847     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4848                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4849 }
4850
4851 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4852                           TCGv_vec a, TCGv_vec b)
4853 {
4854     TCGv_vec x = tcg_temp_new_vec_matching(t);
4855     tcg_gen_sub_vec(vece, x, a, b);
4856     tcg_gen_ussub_vec(vece, t, a, b);
4857     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4858     tcg_gen_or_vec(vece, sat, sat, x);
4859     tcg_temp_free_vec(x);
4860 }
4861
4862 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4863                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4864 {
4865     static const TCGOpcode vecop_list[] = {
4866         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4867     };
4868     static const GVecGen4 ops[4] = {
4869         { .fniv = gen_uqsub_vec,
4870           .fno = gen_helper_gvec_uqsub_b,
4871           .opt_opc = vecop_list,
4872           .write_aofs = true,
4873           .vece = MO_8 },
4874         { .fniv = gen_uqsub_vec,
4875           .fno = gen_helper_gvec_uqsub_h,
4876           .opt_opc = vecop_list,
4877           .write_aofs = true,
4878           .vece = MO_16 },
4879         { .fniv = gen_uqsub_vec,
4880           .fno = gen_helper_gvec_uqsub_s,
4881           .opt_opc = vecop_list,
4882           .write_aofs = true,
4883           .vece = MO_32 },
4884         { .fniv = gen_uqsub_vec,
4885           .fno = gen_helper_gvec_uqsub_d,
4886           .opt_opc = vecop_list,
4887           .write_aofs = true,
4888           .vece = MO_64 },
4889     };
4890     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4891                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4892 }
4893
4894 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4895                           TCGv_vec a, TCGv_vec b)
4896 {
4897     TCGv_vec x = tcg_temp_new_vec_matching(t);
4898     tcg_gen_sub_vec(vece, x, a, b);
4899     tcg_gen_sssub_vec(vece, t, a, b);
4900     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4901     tcg_gen_or_vec(vece, sat, sat, x);
4902     tcg_temp_free_vec(x);
4903 }
4904
4905 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4906                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4907 {
4908     static const TCGOpcode vecop_list[] = {
4909         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4910     };
4911     static const GVecGen4 ops[4] = {
4912         { .fniv = gen_sqsub_vec,
4913           .fno = gen_helper_gvec_sqsub_b,
4914           .opt_opc = vecop_list,
4915           .write_aofs = true,
4916           .vece = MO_8 },
4917         { .fniv = gen_sqsub_vec,
4918           .fno = gen_helper_gvec_sqsub_h,
4919           .opt_opc = vecop_list,
4920           .write_aofs = true,
4921           .vece = MO_16 },
4922         { .fniv = gen_sqsub_vec,
4923           .fno = gen_helper_gvec_sqsub_s,
4924           .opt_opc = vecop_list,
4925           .write_aofs = true,
4926           .vece = MO_32 },
4927         { .fniv = gen_sqsub_vec,
4928           .fno = gen_helper_gvec_sqsub_d,
4929           .opt_opc = vecop_list,
4930           .write_aofs = true,
4931           .vece = MO_64 },
4932     };
4933     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4934                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4935 }
4936
4937 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4938 {
4939     TCGv_i32 t = tcg_temp_new_i32();
4940
4941     tcg_gen_sub_i32(t, a, b);
4942     tcg_gen_sub_i32(d, b, a);
4943     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4944     tcg_temp_free_i32(t);
4945 }
4946
4947 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4948 {
4949     TCGv_i64 t = tcg_temp_new_i64();
4950
4951     tcg_gen_sub_i64(t, a, b);
4952     tcg_gen_sub_i64(d, b, a);
4953     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4954     tcg_temp_free_i64(t);
4955 }
4956
4957 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4958 {
4959     TCGv_vec t = tcg_temp_new_vec_matching(d);
4960
4961     tcg_gen_smin_vec(vece, t, a, b);
4962     tcg_gen_smax_vec(vece, d, a, b);
4963     tcg_gen_sub_vec(vece, d, d, t);
4964     tcg_temp_free_vec(t);
4965 }
4966
4967 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4968                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4969 {
4970     static const TCGOpcode vecop_list[] = {
4971         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4972     };
4973     static const GVecGen3 ops[4] = {
4974         { .fniv = gen_sabd_vec,
4975           .fno = gen_helper_gvec_sabd_b,
4976           .opt_opc = vecop_list,
4977           .vece = MO_8 },
4978         { .fniv = gen_sabd_vec,
4979           .fno = gen_helper_gvec_sabd_h,
4980           .opt_opc = vecop_list,
4981           .vece = MO_16 },
4982         { .fni4 = gen_sabd_i32,
4983           .fniv = gen_sabd_vec,
4984           .fno = gen_helper_gvec_sabd_s,
4985           .opt_opc = vecop_list,
4986           .vece = MO_32 },
4987         { .fni8 = gen_sabd_i64,
4988           .fniv = gen_sabd_vec,
4989           .fno = gen_helper_gvec_sabd_d,
4990           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4991           .opt_opc = vecop_list,
4992           .vece = MO_64 },
4993     };
4994     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4995 }
4996
4997 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4998 {
4999     TCGv_i32 t = tcg_temp_new_i32();
5000
5001     tcg_gen_sub_i32(t, a, b);
5002     tcg_gen_sub_i32(d, b, a);
5003     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
5004     tcg_temp_free_i32(t);
5005 }
5006
5007 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5008 {
5009     TCGv_i64 t = tcg_temp_new_i64();
5010
5011     tcg_gen_sub_i64(t, a, b);
5012     tcg_gen_sub_i64(d, b, a);
5013     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
5014     tcg_temp_free_i64(t);
5015 }
5016
5017 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5018 {
5019     TCGv_vec t = tcg_temp_new_vec_matching(d);
5020
5021     tcg_gen_umin_vec(vece, t, a, b);
5022     tcg_gen_umax_vec(vece, d, a, b);
5023     tcg_gen_sub_vec(vece, d, d, t);
5024     tcg_temp_free_vec(t);
5025 }
5026
5027 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5028                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5029 {
5030     static const TCGOpcode vecop_list[] = {
5031         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5032     };
5033     static const GVecGen3 ops[4] = {
5034         { .fniv = gen_uabd_vec,
5035           .fno = gen_helper_gvec_uabd_b,
5036           .opt_opc = vecop_list,
5037           .vece = MO_8 },
5038         { .fniv = gen_uabd_vec,
5039           .fno = gen_helper_gvec_uabd_h,
5040           .opt_opc = vecop_list,
5041           .vece = MO_16 },
5042         { .fni4 = gen_uabd_i32,
5043           .fniv = gen_uabd_vec,
5044           .fno = gen_helper_gvec_uabd_s,
5045           .opt_opc = vecop_list,
5046           .vece = MO_32 },
5047         { .fni8 = gen_uabd_i64,
5048           .fniv = gen_uabd_vec,
5049           .fno = gen_helper_gvec_uabd_d,
5050           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5051           .opt_opc = vecop_list,
5052           .vece = MO_64 },
5053     };
5054     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5055 }
5056
5057 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5058 {
5059     TCGv_i32 t = tcg_temp_new_i32();
5060     gen_sabd_i32(t, a, b);
5061     tcg_gen_add_i32(d, d, t);
5062     tcg_temp_free_i32(t);
5063 }
5064
5065 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5066 {
5067     TCGv_i64 t = tcg_temp_new_i64();
5068     gen_sabd_i64(t, a, b);
5069     tcg_gen_add_i64(d, d, t);
5070     tcg_temp_free_i64(t);
5071 }
5072
5073 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5074 {
5075     TCGv_vec t = tcg_temp_new_vec_matching(d);
5076     gen_sabd_vec(vece, t, a, b);
5077     tcg_gen_add_vec(vece, d, d, t);
5078     tcg_temp_free_vec(t);
5079 }
5080
5081 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5082                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5083 {
5084     static const TCGOpcode vecop_list[] = {
5085         INDEX_op_sub_vec, INDEX_op_add_vec,
5086         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
5087     };
5088     static const GVecGen3 ops[4] = {
5089         { .fniv = gen_saba_vec,
5090           .fno = gen_helper_gvec_saba_b,
5091           .opt_opc = vecop_list,
5092           .load_dest = true,
5093           .vece = MO_8 },
5094         { .fniv = gen_saba_vec,
5095           .fno = gen_helper_gvec_saba_h,
5096           .opt_opc = vecop_list,
5097           .load_dest = true,
5098           .vece = MO_16 },
5099         { .fni4 = gen_saba_i32,
5100           .fniv = gen_saba_vec,
5101           .fno = gen_helper_gvec_saba_s,
5102           .opt_opc = vecop_list,
5103           .load_dest = true,
5104           .vece = MO_32 },
5105         { .fni8 = gen_saba_i64,
5106           .fniv = gen_saba_vec,
5107           .fno = gen_helper_gvec_saba_d,
5108           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5109           .opt_opc = vecop_list,
5110           .load_dest = true,
5111           .vece = MO_64 },
5112     };
5113     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5114 }
5115
5116 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5117 {
5118     TCGv_i32 t = tcg_temp_new_i32();
5119     gen_uabd_i32(t, a, b);
5120     tcg_gen_add_i32(d, d, t);
5121     tcg_temp_free_i32(t);
5122 }
5123
5124 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5125 {
5126     TCGv_i64 t = tcg_temp_new_i64();
5127     gen_uabd_i64(t, a, b);
5128     tcg_gen_add_i64(d, d, t);
5129     tcg_temp_free_i64(t);
5130 }
5131
5132 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5133 {
5134     TCGv_vec t = tcg_temp_new_vec_matching(d);
5135     gen_uabd_vec(vece, t, a, b);
5136     tcg_gen_add_vec(vece, d, d, t);
5137     tcg_temp_free_vec(t);
5138 }
5139
5140 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5141                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5142 {
5143     static const TCGOpcode vecop_list[] = {
5144         INDEX_op_sub_vec, INDEX_op_add_vec,
5145         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5146     };
5147     static const GVecGen3 ops[4] = {
5148         { .fniv = gen_uaba_vec,
5149           .fno = gen_helper_gvec_uaba_b,
5150           .opt_opc = vecop_list,
5151           .load_dest = true,
5152           .vece = MO_8 },
5153         { .fniv = gen_uaba_vec,
5154           .fno = gen_helper_gvec_uaba_h,
5155           .opt_opc = vecop_list,
5156           .load_dest = true,
5157           .vece = MO_16 },
5158         { .fni4 = gen_uaba_i32,
5159           .fniv = gen_uaba_vec,
5160           .fno = gen_helper_gvec_uaba_s,
5161           .opt_opc = vecop_list,
5162           .load_dest = true,
5163           .vece = MO_32 },
5164         { .fni8 = gen_uaba_i64,
5165           .fniv = gen_uaba_vec,
5166           .fno = gen_helper_gvec_uaba_d,
5167           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5168           .opt_opc = vecop_list,
5169           .load_dest = true,
5170           .vece = MO_64 },
5171     };
5172     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5173 }
5174
5175 /* Translate a NEON data processing instruction.  Return nonzero if the
5176    instruction is invalid.
5177    We process data in a mixture of 32-bit and 64-bit chunks.
5178    Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
5179
5180 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5181 {
5182     int op;
5183     int q;
5184     int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
5185     int size;
5186     int pass;
5187     int u;
5188     int vec_size;
5189     uint32_t imm;
5190     TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5191     TCGv_ptr ptr1;
5192     TCGv_i64 tmp64;
5193
5194     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
5195         return 1;
5196     }
5197
5198     /* FIXME: this access check should not take precedence over UNDEF
5199      * for invalid encodings; we will generate incorrect syndrome information
5200      * for attempts to execute invalid vfp/neon encodings with FP disabled.
5201      */
5202     if (s->fp_excp_el) {
5203         gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
5204                            syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
5205         return 0;
5206     }
5207
5208     if (!s->vfp_enabled)
5209       return 1;
5210     q = (insn & (1 << 6)) != 0;
5211     u = (insn >> 24) & 1;
5212     VFP_DREG_D(rd, insn);
5213     VFP_DREG_N(rn, insn);
5214     VFP_DREG_M(rm, insn);
5215     size = (insn >> 20) & 3;
5216     vec_size = q ? 16 : 8;
5217     rd_ofs = neon_reg_offset(rd, 0);
5218     rn_ofs = neon_reg_offset(rn, 0);
5219     rm_ofs = neon_reg_offset(rm, 0);
5220
5221     if ((insn & (1 << 23)) == 0) {
5222         /* Three register same length: handled by decodetree */
5223         return 1;
5224     } else if (insn & (1 << 4)) {
5225         /* Two registers and shift or reg and imm: handled by decodetree */
5226         return 1;
5227     } else { /* (insn & 0x00800010 == 0x00800000) */
5228         if (size != 3) {
5229             op = (insn >> 8) & 0xf;
5230             if ((insn & (1 << 6)) == 0) {
5231                 /* Three registers of different lengths.  */
5232                 /* undefreq: bit 0 : UNDEF if size == 0
5233                  *           bit 1 : UNDEF if size == 1
5234                  *           bit 2 : UNDEF if size == 2
5235                  *           bit 3 : UNDEF if U == 1
5236                  * Note that [2:0] set implies 'always UNDEF'
5237                  */
5238                 int undefreq;
5239                 /* prewiden, src1_wide, src2_wide, undefreq */
5240                 static const int neon_3reg_wide[16][4] = {
5241                     {0, 0, 0, 7}, /* VADDL: handled by decodetree */
5242                     {0, 0, 0, 7}, /* VADDW: handled by decodetree */
5243                     {0, 0, 0, 7}, /* VSUBL: handled by decodetree */
5244                     {0, 0, 0, 7}, /* VSUBW: handled by decodetree */
5245                     {0, 0, 0, 7}, /* VADDHN: handled by decodetree */
5246                     {0, 0, 0, 7}, /* VABAL */
5247                     {0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
5248                     {0, 0, 0, 7}, /* VABDL */
5249                     {0, 0, 0, 7}, /* VMLAL */
5250                     {0, 0, 0, 9}, /* VQDMLAL */
5251                     {0, 0, 0, 7}, /* VMLSL */
5252                     {0, 0, 0, 9}, /* VQDMLSL */
5253                     {0, 0, 0, 7}, /* Integer VMULL */
5254                     {0, 0, 0, 9}, /* VQDMULL */
5255                     {0, 0, 0, 0xa}, /* Polynomial VMULL */
5256                     {0, 0, 0, 7}, /* Reserved: always UNDEF */
5257                 };
5258
5259                 undefreq = neon_3reg_wide[op][3];
5260
5261                 if ((undefreq & (1 << size)) ||
5262                     ((undefreq & 8) && u)) {
5263                     return 1;
5264                 }
5265                 if (rd & 1) {
5266                     return 1;
5267                 }
5268
5269                 /* Handle polynomial VMULL in a single pass.  */
5270                 if (op == 14) {
5271                     if (size == 0) {
5272                         /* VMULL.P8 */
5273                         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
5274                                            0, gen_helper_neon_pmull_h);
5275                     } else {
5276                         /* VMULL.P64 */
5277                         if (!dc_isar_feature(aa32_pmull, s)) {
5278                             return 1;
5279                         }
5280                         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
5281                                            0, gen_helper_gvec_pmull_q);
5282                     }
5283                     return 0;
5284                 }
5285
5286                 /* Avoid overlapping operands.  Wide source operands are
5287                    always aligned so will never overlap with wide
5288                    destinations in problematic ways.  */
5289                 if (rd == rm) {
5290                     tmp = neon_load_reg(rm, 1);
5291                     neon_store_scratch(2, tmp);
5292                 } else if (rd == rn) {
5293                     tmp = neon_load_reg(rn, 1);
5294                     neon_store_scratch(2, tmp);
5295                 }
5296                 tmp3 = NULL;
5297                 for (pass = 0; pass < 2; pass++) {
5298                     if (pass == 1 && rd == rn) {
5299                         tmp = neon_load_scratch(2);
5300                     } else {
5301                         tmp = neon_load_reg(rn, pass);
5302                     }
5303                     if (pass == 1 && rd == rm) {
5304                         tmp2 = neon_load_scratch(2);
5305                     } else {
5306                         tmp2 = neon_load_reg(rm, pass);
5307                     }
5308                     switch (op) {
5309                     case 9: case 11: case 13:
5310                         /* VQDMLAL, VQDMLSL, VQDMULL */
5311                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5312                         break;
5313                     default: /* 15 is RESERVED: caught earlier  */
5314                         abort();
5315                     }
5316                     if (op == 13) {
5317                         /* VQDMULL */
5318                         gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5319                         neon_store_reg64(cpu_V0, rd + pass);
5320                     } else {
5321                         /* Accumulate.  */
5322                         neon_load_reg64(cpu_V1, rd + pass);
5323                         switch (op) {
5324                         case 9: case 11: /* VQDMLAL, VQDMLSL */
5325                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5326                             if (op == 11) {
5327                                 gen_neon_negl(cpu_V0, size);
5328                             }
5329                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5330                             break;
5331                         default:
5332                             abort();
5333                         }
5334                         neon_store_reg64(cpu_V0, rd + pass);
5335                     }
5336                 }
5337             } else {
5338                 /* Two registers and a scalar. NB that for ops of this form
5339                  * the ARM ARM labels bit 24 as Q, but it is in our variable
5340                  * 'u', not 'q'.
5341                  */
5342                 if (size == 0) {
5343                     return 1;
5344                 }
5345                 switch (op) {
5346                 case 1: /* Float VMLA scalar */
5347                 case 5: /* Floating point VMLS scalar */
5348                 case 9: /* Floating point VMUL scalar */
5349                     if (size == 1) {
5350                         return 1;
5351                     }
5352                     /* fall through */
5353                 case 0: /* Integer VMLA scalar */
5354                 case 4: /* Integer VMLS scalar */
5355                 case 8: /* Integer VMUL scalar */
5356                 case 12: /* VQDMULH scalar */
5357                 case 13: /* VQRDMULH scalar */
5358                     if (u && ((rd | rn) & 1)) {
5359                         return 1;
5360                     }
5361                     tmp = neon_get_scalar(size, rm);
5362                     neon_store_scratch(0, tmp);
5363                     for (pass = 0; pass < (u ? 4 : 2); pass++) {
5364                         tmp = neon_load_scratch(0);
5365                         tmp2 = neon_load_reg(rn, pass);
5366                         if (op == 12) {
5367                             if (size == 1) {
5368                                 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5369                             } else {
5370                                 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5371                             }
5372                         } else if (op == 13) {
5373                             if (size == 1) {
5374                                 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5375                             } else {
5376                                 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5377                             }
5378                         } else if (op & 1) {
5379                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5380                             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5381                             tcg_temp_free_ptr(fpstatus);
5382                         } else {
5383                             switch (size) {
5384                             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5385                             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5386                             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5387                             default: abort();
5388                             }
5389                         }
5390                         tcg_temp_free_i32(tmp2);
5391                         if (op < 8) {
5392                             /* Accumulate.  */
5393                             tmp2 = neon_load_reg(rd, pass);
5394                             switch (op) {
5395                             case 0:
5396                                 gen_neon_add(size, tmp, tmp2);
5397                                 break;
5398                             case 1:
5399                             {
5400                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5401                                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5402                                 tcg_temp_free_ptr(fpstatus);
5403                                 break;
5404                             }
5405                             case 4:
5406                                 gen_neon_rsb(size, tmp, tmp2);
5407                                 break;
5408                             case 5:
5409                             {
5410                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5411                                 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5412                                 tcg_temp_free_ptr(fpstatus);
5413                                 break;
5414                             }
5415                             default:
5416                                 abort();
5417                             }
5418                             tcg_temp_free_i32(tmp2);
5419                         }
5420                         neon_store_reg(rd, pass, tmp);
5421                     }
5422                     break;
5423                 case 3: /* VQDMLAL scalar */
5424                 case 7: /* VQDMLSL scalar */
5425                 case 11: /* VQDMULL scalar */
5426                     if (u == 1) {
5427                         return 1;
5428                     }
5429                     /* fall through */
5430                 case 2: /* VMLAL sclar */
5431                 case 6: /* VMLSL scalar */
5432                 case 10: /* VMULL scalar */
5433                     if (rd & 1) {
5434                         return 1;
5435                     }
5436                     tmp2 = neon_get_scalar(size, rm);
5437                     /* We need a copy of tmp2 because gen_neon_mull
5438                      * deletes it during pass 0.  */
5439                     tmp4 = tcg_temp_new_i32();
5440                     tcg_gen_mov_i32(tmp4, tmp2);
5441                     tmp3 = neon_load_reg(rn, 1);
5442
5443                     for (pass = 0; pass < 2; pass++) {
5444                         if (pass == 0) {
5445                             tmp = neon_load_reg(rn, 0);
5446                         } else {
5447                             tmp = tmp3;
5448                             tmp2 = tmp4;
5449                         }
5450                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5451                         if (op != 11) {
5452                             neon_load_reg64(cpu_V1, rd + pass);
5453                         }
5454                         switch (op) {
5455                         case 6:
5456                             gen_neon_negl(cpu_V0, size);
5457                             /* Fall through */
5458                         case 2:
5459                             gen_neon_addl(size);
5460                             break;
5461                         case 3: case 7:
5462                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5463                             if (op == 7) {
5464                                 gen_neon_negl(cpu_V0, size);
5465                             }
5466                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5467                             break;
5468                         case 10:
5469                             /* no-op */
5470                             break;
5471                         case 11:
5472                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5473                             break;
5474                         default:
5475                             abort();
5476                         }
5477                         neon_store_reg64(cpu_V0, rd + pass);
5478                     }
5479                     break;
5480                 case 14: /* VQRDMLAH scalar */
5481                 case 15: /* VQRDMLSH scalar */
5482                     {
5483                         NeonGenThreeOpEnvFn *fn;
5484
5485                         if (!dc_isar_feature(aa32_rdm, s)) {
5486                             return 1;
5487                         }
5488                         if (u && ((rd | rn) & 1)) {
5489                             return 1;
5490                         }
5491                         if (op == 14) {
5492                             if (size == 1) {
5493                                 fn = gen_helper_neon_qrdmlah_s16;
5494                             } else {
5495                                 fn = gen_helper_neon_qrdmlah_s32;
5496                             }
5497                         } else {
5498                             if (size == 1) {
5499                                 fn = gen_helper_neon_qrdmlsh_s16;
5500                             } else {
5501                                 fn = gen_helper_neon_qrdmlsh_s32;
5502                             }
5503                         }
5504
5505                         tmp2 = neon_get_scalar(size, rm);
5506                         for (pass = 0; pass < (u ? 4 : 2); pass++) {
5507                             tmp = neon_load_reg(rn, pass);
5508                             tmp3 = neon_load_reg(rd, pass);
5509                             fn(tmp, cpu_env, tmp, tmp2, tmp3);
5510                             tcg_temp_free_i32(tmp3);
5511                             neon_store_reg(rd, pass, tmp);
5512                         }
5513                         tcg_temp_free_i32(tmp2);
5514                     }
5515                     break;
5516                 default:
5517                     g_assert_not_reached();
5518                 }
5519             }
5520         } else { /* size == 3 */
5521             if (!u) {
5522                 /* Extract.  */
5523                 imm = (insn >> 8) & 0xf;
5524
5525                 if (imm > 7 && !q)
5526                     return 1;
5527
5528                 if (q && ((rd | rn | rm) & 1)) {
5529                     return 1;
5530                 }
5531
5532                 if (imm == 0) {
5533                     neon_load_reg64(cpu_V0, rn);
5534                     if (q) {
5535                         neon_load_reg64(cpu_V1, rn + 1);
5536                     }
5537                 } else if (imm == 8) {
5538                     neon_load_reg64(cpu_V0, rn + 1);
5539                     if (q) {
5540                         neon_load_reg64(cpu_V1, rm);
5541                     }
5542                 } else if (q) {
5543                     tmp64 = tcg_temp_new_i64();
5544                     if (imm < 8) {
5545                         neon_load_reg64(cpu_V0, rn);
5546                         neon_load_reg64(tmp64, rn + 1);
5547                     } else {
5548                         neon_load_reg64(cpu_V0, rn + 1);
5549                         neon_load_reg64(tmp64, rm);
5550                     }
5551                     tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
5552                     tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
5553                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5554                     if (imm < 8) {
5555                         neon_load_reg64(cpu_V1, rm);
5556                     } else {
5557                         neon_load_reg64(cpu_V1, rm + 1);
5558                         imm -= 8;
5559                     }
5560                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5561                     tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
5562                     tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
5563                     tcg_temp_free_i64(tmp64);
5564                 } else {
5565                     /* BUGFIX */
5566                     neon_load_reg64(cpu_V0, rn);
5567                     tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
5568                     neon_load_reg64(cpu_V1, rm);
5569                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5570                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5571                 }
5572                 neon_store_reg64(cpu_V0, rd);
5573                 if (q) {
5574                     neon_store_reg64(cpu_V1, rd + 1);
5575                 }
5576             } else if ((insn & (1 << 11)) == 0) {
5577                 /* Two register misc.  */
5578                 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
5579                 size = (insn >> 18) & 3;
5580                 /* UNDEF for unknown op values and bad op-size combinations */
5581                 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
5582                     return 1;
5583                 }
5584                 if (neon_2rm_is_v8_op(op) &&
5585                     !arm_dc_feature(s, ARM_FEATURE_V8)) {
5586                     return 1;
5587                 }
5588                 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
5589                     q && ((rm | rd) & 1)) {
5590                     return 1;
5591                 }
5592                 switch (op) {
5593                 case NEON_2RM_VREV64:
5594                     for (pass = 0; pass < (q ? 2 : 1); pass++) {
5595                         tmp = neon_load_reg(rm, pass * 2);
5596                         tmp2 = neon_load_reg(rm, pass * 2 + 1);
5597                         switch (size) {
5598                         case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
5599                         case 1: gen_swap_half(tmp); break;
5600                         case 2: /* no-op */ break;
5601                         default: abort();
5602                         }
5603                         neon_store_reg(rd, pass * 2 + 1, tmp);
5604                         if (size == 2) {
5605                             neon_store_reg(rd, pass * 2, tmp2);
5606                         } else {
5607                             switch (size) {
5608                             case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
5609                             case 1: gen_swap_half(tmp2); break;
5610                             default: abort();
5611                             }
5612                             neon_store_reg(rd, pass * 2, tmp2);
5613                         }
5614                     }
5615                     break;
5616                 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
5617                 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
5618                     for (pass = 0; pass < q + 1; pass++) {
5619                         tmp = neon_load_reg(rm, pass * 2);
5620                         gen_neon_widen(cpu_V0, tmp, size, op & 1);
5621                         tmp = neon_load_reg(rm, pass * 2 + 1);
5622                         gen_neon_widen(cpu_V1, tmp, size, op & 1);
5623                         switch (size) {
5624                         case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
5625                         case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
5626                         case 2: tcg_gen_add_i64(CPU_V001); break;
5627                         default: abort();
5628                         }
5629                         if (op >= NEON_2RM_VPADAL) {
5630                             /* Accumulate.  */
5631                             neon_load_reg64(cpu_V1, rd + pass);
5632                             gen_neon_addl(size);
5633                         }
5634                         neon_store_reg64(cpu_V0, rd + pass);
5635                     }
5636                     break;
5637                 case NEON_2RM_VTRN:
5638                     if (size == 2) {
5639                         int n;
5640                         for (n = 0; n < (q ? 4 : 2); n += 2) {
5641                             tmp = neon_load_reg(rm, n);
5642                             tmp2 = neon_load_reg(rd, n + 1);
5643                             neon_store_reg(rm, n, tmp2);
5644                             neon_store_reg(rd, n + 1, tmp);
5645                         }
5646                     } else {
5647                         goto elementwise;
5648                     }
5649                     break;
5650                 case NEON_2RM_VUZP:
5651                     if (gen_neon_unzip(rd, rm, size, q)) {
5652                         return 1;
5653                     }
5654                     break;
5655                 case NEON_2RM_VZIP:
5656                     if (gen_neon_zip(rd, rm, size, q)) {
5657                         return 1;
5658                     }
5659                     break;
5660                 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
5661                     /* also VQMOVUN; op field and mnemonics don't line up */
5662                     if (rm & 1) {
5663                         return 1;
5664                     }
5665                     tmp2 = NULL;
5666                     for (pass = 0; pass < 2; pass++) {
5667                         neon_load_reg64(cpu_V0, rm + pass);
5668                         tmp = tcg_temp_new_i32();
5669                         gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
5670                                            tmp, cpu_V0);
5671                         if (pass == 0) {
5672                             tmp2 = tmp;
5673                         } else {
5674                             neon_store_reg(rd, 0, tmp2);
5675                             neon_store_reg(rd, 1, tmp);
5676                         }
5677                     }
5678                     break;
5679                 case NEON_2RM_VSHLL:
5680                     if (q || (rd & 1)) {
5681                         return 1;
5682                     }
5683                     tmp = neon_load_reg(rm, 0);
5684                     tmp2 = neon_load_reg(rm, 1);
5685                     for (pass = 0; pass < 2; pass++) {
5686                         if (pass == 1)
5687                             tmp = tmp2;
5688                         gen_neon_widen(cpu_V0, tmp, size, 1);
5689                         tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
5690                         neon_store_reg64(cpu_V0, rd + pass);
5691                     }
5692                     break;
5693                 case NEON_2RM_VCVT_F16_F32:
5694                 {
5695                     TCGv_ptr fpst;
5696                     TCGv_i32 ahp;
5697
5698                     if (!dc_isar_feature(aa32_fp16_spconv, s) ||
5699                         q || (rm & 1)) {
5700                         return 1;
5701                     }
5702                     fpst = get_fpstatus_ptr(true);
5703                     ahp = get_ahp_flag();
5704                     tmp = neon_load_reg(rm, 0);
5705                     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
5706                     tmp2 = neon_load_reg(rm, 1);
5707                     gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
5708                     tcg_gen_shli_i32(tmp2, tmp2, 16);
5709                     tcg_gen_or_i32(tmp2, tmp2, tmp);
5710                     tcg_temp_free_i32(tmp);
5711                     tmp = neon_load_reg(rm, 2);
5712                     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
5713                     tmp3 = neon_load_reg(rm, 3);
5714                     neon_store_reg(rd, 0, tmp2);
5715                     gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
5716                     tcg_gen_shli_i32(tmp3, tmp3, 16);
5717                     tcg_gen_or_i32(tmp3, tmp3, tmp);
5718                     neon_store_reg(rd, 1, tmp3);
5719                     tcg_temp_free_i32(tmp);
5720                     tcg_temp_free_i32(ahp);
5721                     tcg_temp_free_ptr(fpst);
5722                     break;
5723                 }
5724                 case NEON_2RM_VCVT_F32_F16:
5725                 {
5726                     TCGv_ptr fpst;
5727                     TCGv_i32 ahp;
5728                     if (!dc_isar_feature(aa32_fp16_spconv, s) ||
5729                         q || (rd & 1)) {
5730                         return 1;
5731                     }
5732                     fpst = get_fpstatus_ptr(true);
5733                     ahp = get_ahp_flag();
5734                     tmp3 = tcg_temp_new_i32();
5735                     tmp = neon_load_reg(rm, 0);
5736                     tmp2 = neon_load_reg(rm, 1);
5737                     tcg_gen_ext16u_i32(tmp3, tmp);
5738                     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
5739                     neon_store_reg(rd, 0, tmp3);
5740                     tcg_gen_shri_i32(tmp, tmp, 16);
5741                     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
5742                     neon_store_reg(rd, 1, tmp);
5743                     tmp3 = tcg_temp_new_i32();
5744                     tcg_gen_ext16u_i32(tmp3, tmp2);
5745                     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
5746                     neon_store_reg(rd, 2, tmp3);
5747                     tcg_gen_shri_i32(tmp2, tmp2, 16);
5748                     gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
5749                     neon_store_reg(rd, 3, tmp2);
5750                     tcg_temp_free_i32(ahp);
5751                     tcg_temp_free_ptr(fpst);
5752                     break;
5753                 }
5754                 case NEON_2RM_AESE: case NEON_2RM_AESMC:
5755                     if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
5756                         return 1;
5757                     }
5758                     /*
5759                      * Bit 6 is the lowest opcode bit; it distinguishes
5760                      * between encryption (AESE/AESMC) and decryption
5761                      * (AESD/AESIMC).
5762                      */
5763                     if (op == NEON_2RM_AESE) {
5764                         tcg_gen_gvec_3_ool(vfp_reg_offset(true, rd),
5765                                            vfp_reg_offset(true, rd),
5766                                            vfp_reg_offset(true, rm),
5767                                            16, 16, extract32(insn, 6, 1),
5768                                            gen_helper_crypto_aese);
5769                     } else {
5770                         tcg_gen_gvec_2_ool(vfp_reg_offset(true, rd),
5771                                            vfp_reg_offset(true, rm),
5772                                            16, 16, extract32(insn, 6, 1),
5773                                            gen_helper_crypto_aesmc);
5774                     }
5775                     break;
5776                 case NEON_2RM_SHA1H:
5777                     if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
5778                         return 1;
5779                     }
5780                     tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0,
5781                                        gen_helper_crypto_sha1h);
5782                     break;
5783                 case NEON_2RM_SHA1SU1:
5784                     if ((rm | rd) & 1) {
5785                             return 1;
5786                     }
5787                     /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
5788                     if (q) {
5789                         if (!dc_isar_feature(aa32_sha2, s)) {
5790                             return 1;
5791                         }
5792                     } else if (!dc_isar_feature(aa32_sha1, s)) {
5793                         return 1;
5794                     }
5795                     tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0,
5796                                        q ? gen_helper_crypto_sha256su0
5797                                        : gen_helper_crypto_sha1su1);
5798                     break;
5799                 case NEON_2RM_VMVN:
5800                     tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
5801                     break;
5802                 case NEON_2RM_VNEG:
5803                     tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
5804                     break;
5805                 case NEON_2RM_VABS:
5806                     tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
5807                     break;
5808
5809                 case NEON_2RM_VCEQ0:
5810                     gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
5811                     break;
5812                 case NEON_2RM_VCGT0:
5813                     gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
5814                     break;
5815                 case NEON_2RM_VCLE0:
5816                     gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
5817                     break;
5818                 case NEON_2RM_VCGE0:
5819                     gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
5820                     break;
5821                 case NEON_2RM_VCLT0:
5822                     gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
5823                     break;
5824
5825                 default:
5826                 elementwise:
5827                     for (pass = 0; pass < (q ? 4 : 2); pass++) {
5828                         tmp = neon_load_reg(rm, pass);
5829                         switch (op) {
5830                         case NEON_2RM_VREV32:
5831                             switch (size) {
5832                             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
5833                             case 1: gen_swap_half(tmp); break;
5834                             default: abort();
5835                             }
5836                             break;
5837                         case NEON_2RM_VREV16:
5838                             gen_rev16(tmp, tmp);
5839                             break;
5840                         case NEON_2RM_VCLS:
5841                             switch (size) {
5842                             case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
5843                             case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
5844                             case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
5845                             default: abort();
5846                             }
5847                             break;
5848                         case NEON_2RM_VCLZ:
5849                             switch (size) {
5850                             case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
5851                             case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
5852                             case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
5853                             default: abort();
5854                             }
5855                             break;
5856                         case NEON_2RM_VCNT:
5857                             gen_helper_neon_cnt_u8(tmp, tmp);
5858                             break;
5859                         case NEON_2RM_VQABS:
5860                             switch (size) {
5861                             case 0:
5862                                 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
5863                                 break;
5864                             case 1:
5865                                 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
5866                                 break;
5867                             case 2:
5868                                 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
5869                                 break;
5870                             default: abort();
5871                             }
5872                             break;
5873                         case NEON_2RM_VQNEG:
5874                             switch (size) {
5875                             case 0:
5876                                 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
5877                                 break;
5878                             case 1:
5879                                 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
5880                                 break;
5881                             case 2:
5882                                 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
5883                                 break;
5884                             default: abort();
5885                             }
5886                             break;
5887                         case NEON_2RM_VCGT0_F:
5888                         {
5889                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5890                             tmp2 = tcg_const_i32(0);
5891                             gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5892                             tcg_temp_free_i32(tmp2);
5893                             tcg_temp_free_ptr(fpstatus);
5894                             break;
5895                         }
5896                         case NEON_2RM_VCGE0_F:
5897                         {
5898                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5899                             tmp2 = tcg_const_i32(0);
5900                             gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5901                             tcg_temp_free_i32(tmp2);
5902                             tcg_temp_free_ptr(fpstatus);
5903                             break;
5904                         }
5905                         case NEON_2RM_VCEQ0_F:
5906                         {
5907                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5908                             tmp2 = tcg_const_i32(0);
5909                             gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5910                             tcg_temp_free_i32(tmp2);
5911                             tcg_temp_free_ptr(fpstatus);
5912                             break;
5913                         }
5914                         case NEON_2RM_VCLE0_F:
5915                         {
5916                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5917                             tmp2 = tcg_const_i32(0);
5918                             gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
5919                             tcg_temp_free_i32(tmp2);
5920                             tcg_temp_free_ptr(fpstatus);
5921                             break;
5922                         }
5923                         case NEON_2RM_VCLT0_F:
5924                         {
5925                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5926                             tmp2 = tcg_const_i32(0);
5927                             gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
5928                             tcg_temp_free_i32(tmp2);
5929                             tcg_temp_free_ptr(fpstatus);
5930                             break;
5931                         }
5932                         case NEON_2RM_VABS_F:
5933                             gen_helper_vfp_abss(tmp, tmp);
5934                             break;
5935                         case NEON_2RM_VNEG_F:
5936                             gen_helper_vfp_negs(tmp, tmp);
5937                             break;
5938                         case NEON_2RM_VSWP:
5939                             tmp2 = neon_load_reg(rd, pass);
5940                             neon_store_reg(rm, pass, tmp2);
5941                             break;
5942                         case NEON_2RM_VTRN:
5943                             tmp2 = neon_load_reg(rd, pass);
5944                             switch (size) {
5945                             case 0: gen_neon_trn_u8(tmp, tmp2); break;
5946                             case 1: gen_neon_trn_u16(tmp, tmp2); break;
5947                             default: abort();
5948                             }
5949                             neon_store_reg(rm, pass, tmp2);
5950                             break;
5951                         case NEON_2RM_VRINTN:
5952                         case NEON_2RM_VRINTA:
5953                         case NEON_2RM_VRINTM:
5954                         case NEON_2RM_VRINTP:
5955                         case NEON_2RM_VRINTZ:
5956                         {
5957                             TCGv_i32 tcg_rmode;
5958                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5959                             int rmode;
5960
5961                             if (op == NEON_2RM_VRINTZ) {
5962                                 rmode = FPROUNDING_ZERO;
5963                             } else {
5964                                 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
5965                             }
5966
5967                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5968                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
5969                                                       cpu_env);
5970                             gen_helper_rints(tmp, tmp, fpstatus);
5971                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
5972                                                       cpu_env);
5973                             tcg_temp_free_ptr(fpstatus);
5974                             tcg_temp_free_i32(tcg_rmode);
5975                             break;
5976                         }
5977                         case NEON_2RM_VRINTX:
5978                         {
5979                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5980                             gen_helper_rints_exact(tmp, tmp, fpstatus);
5981                             tcg_temp_free_ptr(fpstatus);
5982                             break;
5983                         }
5984                         case NEON_2RM_VCVTAU:
5985                         case NEON_2RM_VCVTAS:
5986                         case NEON_2RM_VCVTNU:
5987                         case NEON_2RM_VCVTNS:
5988                         case NEON_2RM_VCVTPU:
5989                         case NEON_2RM_VCVTPS:
5990                         case NEON_2RM_VCVTMU:
5991                         case NEON_2RM_VCVTMS:
5992                         {
5993                             bool is_signed = !extract32(insn, 7, 1);
5994                             TCGv_ptr fpst = get_fpstatus_ptr(1);
5995                             TCGv_i32 tcg_rmode, tcg_shift;
5996                             int rmode = fp_decode_rm[extract32(insn, 8, 2)];
5997
5998                             tcg_shift = tcg_const_i32(0);
5999                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6000                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6001                                                       cpu_env);
6002
6003                             if (is_signed) {
6004                                 gen_helper_vfp_tosls(tmp, tmp,
6005                                                      tcg_shift, fpst);
6006                             } else {
6007                                 gen_helper_vfp_touls(tmp, tmp,
6008                                                      tcg_shift, fpst);
6009                             }
6010
6011                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6012                                                       cpu_env);
6013                             tcg_temp_free_i32(tcg_rmode);
6014                             tcg_temp_free_i32(tcg_shift);
6015                             tcg_temp_free_ptr(fpst);
6016                             break;
6017                         }
6018                         case NEON_2RM_VRECPE:
6019                             gen_helper_recpe_u32(tmp, tmp);
6020                             break;
6021                         case NEON_2RM_VRSQRTE:
6022                             gen_helper_rsqrte_u32(tmp, tmp);
6023                             break;
6024                         case NEON_2RM_VRECPE_F:
6025                         {
6026                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6027                             gen_helper_recpe_f32(tmp, tmp, fpstatus);
6028                             tcg_temp_free_ptr(fpstatus);
6029                             break;
6030                         }
6031                         case NEON_2RM_VRSQRTE_F:
6032                         {
6033                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6034                             gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
6035                             tcg_temp_free_ptr(fpstatus);
6036                             break;
6037                         }
6038                         case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
6039                         {
6040                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6041                             gen_helper_vfp_sitos(tmp, tmp, fpstatus);
6042                             tcg_temp_free_ptr(fpstatus);
6043                             break;
6044                         }
6045                         case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
6046                         {
6047                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6048                             gen_helper_vfp_uitos(tmp, tmp, fpstatus);
6049                             tcg_temp_free_ptr(fpstatus);
6050                             break;
6051                         }
6052                         case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6053                         {
6054                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6055                             gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
6056                             tcg_temp_free_ptr(fpstatus);
6057                             break;
6058                         }
6059                         case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6060                         {
6061                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6062                             gen_helper_vfp_touizs(tmp, tmp, fpstatus);
6063                             tcg_temp_free_ptr(fpstatus);
6064                             break;
6065                         }
6066                         default:
6067                             /* Reserved op values were caught by the
6068                              * neon_2rm_sizes[] check earlier.
6069                              */
6070                             abort();
6071                         }
6072                         neon_store_reg(rd, pass, tmp);
6073                     }
6074                     break;
6075                 }
6076             } else if ((insn & (1 << 10)) == 0) {
6077                 /* VTBL, VTBX.  */
6078                 int n = ((insn >> 8) & 3) + 1;
6079                 if ((rn + n) > 32) {
6080                     /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
6081                      * helper function running off the end of the register file.
6082                      */
6083                     return 1;
6084                 }
6085                 n <<= 3;
6086                 if (insn & (1 << 6)) {
6087                     tmp = neon_load_reg(rd, 0);
6088                 } else {
6089                     tmp = tcg_temp_new_i32();
6090                     tcg_gen_movi_i32(tmp, 0);
6091                 }
6092                 tmp2 = neon_load_reg(rm, 0);
6093                 ptr1 = vfp_reg_ptr(true, rn);
6094                 tmp5 = tcg_const_i32(n);
6095                 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
6096                 tcg_temp_free_i32(tmp);
6097                 if (insn & (1 << 6)) {
6098                     tmp = neon_load_reg(rd, 1);
6099                 } else {
6100                     tmp = tcg_temp_new_i32();
6101                     tcg_gen_movi_i32(tmp, 0);
6102                 }
6103                 tmp3 = neon_load_reg(rm, 1);
6104                 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
6105                 tcg_temp_free_i32(tmp5);
6106                 tcg_temp_free_ptr(ptr1);
6107                 neon_store_reg(rd, 0, tmp2);
6108                 neon_store_reg(rd, 1, tmp3);
6109                 tcg_temp_free_i32(tmp);
6110             } else if ((insn & 0x380) == 0) {
6111                 /* VDUP */
6112                 int element;
6113                 MemOp size;
6114
6115                 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
6116                     return 1;
6117                 }
6118                 if (insn & (1 << 16)) {
6119                     size = MO_8;
6120                     element = (insn >> 17) & 7;
6121                 } else if (insn & (1 << 17)) {
6122                     size = MO_16;
6123                     element = (insn >> 18) & 3;
6124                 } else {
6125                     size = MO_32;
6126                     element = (insn >> 19) & 1;
6127                 }
6128                 tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
6129                                      neon_element_offset(rm, element, size),
6130                                      q ? 16 : 8, q ? 16 : 8);
6131             } else {
6132                 return 1;
6133             }
6134         }
6135     }
6136     return 0;
6137 }
6138
6139 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
6140 {
6141     int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
6142     const ARMCPRegInfo *ri;
6143
6144     cpnum = (insn >> 8) & 0xf;
6145
6146     /* First check for coprocessor space used for XScale/iwMMXt insns */
6147     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
6148         if (extract32(s->c15_cpar, cpnum, 1) == 0) {
6149             return 1;
6150         }
6151         if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
6152             return disas_iwmmxt_insn(s, insn);
6153         } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
6154             return disas_dsp_insn(s, insn);
6155         }
6156         return 1;
6157     }
6158
6159     /* Otherwise treat as a generic register access */
6160     is64 = (insn & (1 << 25)) == 0;
6161     if (!is64 && ((insn & (1 << 4)) == 0)) {
6162         /* cdp */
6163         return 1;
6164     }
6165
6166     crm = insn & 0xf;
6167     if (is64) {
6168         crn = 0;
6169         opc1 = (insn >> 4) & 0xf;
6170         opc2 = 0;
6171         rt2 = (insn >> 16) & 0xf;
6172     } else {
6173         crn = (insn >> 16) & 0xf;
6174         opc1 = (insn >> 21) & 7;
6175         opc2 = (insn >> 5) & 7;
6176         rt2 = 0;
6177     }
6178     isread = (insn >> 20) & 1;
6179     rt = (insn >> 12) & 0xf;
6180
6181     ri = get_arm_cp_reginfo(s->cp_regs,
6182             ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
6183     if (ri) {
6184         bool need_exit_tb;
6185
6186         /* Check access permissions */
6187         if (!cp_access_ok(s->current_el, ri, isread)) {
6188             return 1;
6189         }
6190
6191         if (s->hstr_active || ri->accessfn ||
6192             (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
6193             /* Emit code to perform further access permissions checks at
6194              * runtime; this may result in an exception.
6195              * Note that on XScale all cp0..c13 registers do an access check
6196              * call in order to handle c15_cpar.
6197              */
6198             TCGv_ptr tmpptr;
6199             TCGv_i32 tcg_syn, tcg_isread;
6200             uint32_t syndrome;
6201
6202             /* Note that since we are an implementation which takes an
6203              * exception on a trapped conditional instruction only if the
6204              * instruction passes its condition code check, we can take
6205              * advantage of the clause in the ARM ARM that allows us to set
6206              * the COND field in the instruction to 0xE in all cases.
6207              * We could fish the actual condition out of the insn (ARM)
6208              * or the condexec bits (Thumb) but it isn't necessary.
6209              */
6210             switch (cpnum) {
6211             case 14:
6212                 if (is64) {
6213                     syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6214                                                  isread, false);
6215                 } else {
6216                     syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6217                                                 rt, isread, false);
6218                 }
6219                 break;
6220             case 15:
6221                 if (is64) {
6222                     syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6223                                                  isread, false);
6224                 } else {
6225                     syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6226                                                 rt, isread, false);
6227                 }
6228                 break;
6229             default:
6230                 /* ARMv8 defines that only coprocessors 14 and 15 exist,
6231                  * so this can only happen if this is an ARMv7 or earlier CPU,
6232                  * in which case the syndrome information won't actually be
6233                  * guest visible.
6234                  */
6235                 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
6236                 syndrome = syn_uncategorized();
6237                 break;
6238             }
6239
6240             gen_set_condexec(s);
6241             gen_set_pc_im(s, s->pc_curr);
6242             tmpptr = tcg_const_ptr(ri);
6243             tcg_syn = tcg_const_i32(syndrome);
6244             tcg_isread = tcg_const_i32(isread);
6245             gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
6246                                            tcg_isread);
6247             tcg_temp_free_ptr(tmpptr);
6248             tcg_temp_free_i32(tcg_syn);
6249             tcg_temp_free_i32(tcg_isread);
6250         } else if (ri->type & ARM_CP_RAISES_EXC) {
6251             /*
6252              * The readfn or writefn might raise an exception;
6253              * synchronize the CPU state in case it does.
6254              */
6255             gen_set_condexec(s);
6256             gen_set_pc_im(s, s->pc_curr);
6257         }
6258
6259         /* Handle special cases first */
6260         switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
6261         case ARM_CP_NOP:
6262             return 0;
6263         case ARM_CP_WFI:
6264             if (isread) {
6265                 return 1;
6266             }
6267             gen_set_pc_im(s, s->base.pc_next);
6268             s->base.is_jmp = DISAS_WFI;
6269             return 0;
6270         default:
6271             break;
6272         }
6273
6274         if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
6275             gen_io_start();
6276         }
6277
6278         if (isread) {
6279             /* Read */
6280             if (is64) {
6281                 TCGv_i64 tmp64;
6282                 TCGv_i32 tmp;
6283                 if (ri->type & ARM_CP_CONST) {
6284                     tmp64 = tcg_const_i64(ri->resetvalue);
6285                 } else if (ri->readfn) {
6286                     TCGv_ptr tmpptr;
6287                     tmp64 = tcg_temp_new_i64();
6288                     tmpptr = tcg_const_ptr(ri);
6289                     gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
6290                     tcg_temp_free_ptr(tmpptr);
6291                 } else {
6292                     tmp64 = tcg_temp_new_i64();
6293                     tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
6294                 }
6295                 tmp = tcg_temp_new_i32();
6296                 tcg_gen_extrl_i64_i32(tmp, tmp64);
6297                 store_reg(s, rt, tmp);
6298                 tmp = tcg_temp_new_i32();
6299                 tcg_gen_extrh_i64_i32(tmp, tmp64);
6300                 tcg_temp_free_i64(tmp64);
6301                 store_reg(s, rt2, tmp);
6302             } else {
6303                 TCGv_i32 tmp;
6304                 if (ri->type & ARM_CP_CONST) {
6305                     tmp = tcg_const_i32(ri->resetvalue);
6306                 } else if (ri->readfn) {
6307                     TCGv_ptr tmpptr;
6308                     tmp = tcg_temp_new_i32();
6309                     tmpptr = tcg_const_ptr(ri);
6310                     gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
6311                     tcg_temp_free_ptr(tmpptr);
6312                 } else {
6313                     tmp = load_cpu_offset(ri->fieldoffset);
6314                 }
6315                 if (rt == 15) {
6316                     /* Destination register of r15 for 32 bit loads sets
6317                      * the condition codes from the high 4 bits of the value
6318                      */
6319                     gen_set_nzcv(tmp);
6320                     tcg_temp_free_i32(tmp);
6321                 } else {
6322                     store_reg(s, rt, tmp);
6323                 }
6324             }
6325         } else {
6326             /* Write */
6327             if (ri->type & ARM_CP_CONST) {
6328                 /* If not forbidden by access permissions, treat as WI */
6329                 return 0;
6330             }
6331
6332             if (is64) {
6333                 TCGv_i32 tmplo, tmphi;
6334                 TCGv_i64 tmp64 = tcg_temp_new_i64();
6335                 tmplo = load_reg(s, rt);
6336                 tmphi = load_reg(s, rt2);
6337                 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
6338                 tcg_temp_free_i32(tmplo);
6339                 tcg_temp_free_i32(tmphi);
6340                 if (ri->writefn) {
6341                     TCGv_ptr tmpptr = tcg_const_ptr(ri);
6342                     gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
6343                     tcg_temp_free_ptr(tmpptr);
6344                 } else {
6345                     tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
6346                 }
6347                 tcg_temp_free_i64(tmp64);
6348             } else {
6349                 if (ri->writefn) {
6350                     TCGv_i32 tmp;
6351                     TCGv_ptr tmpptr;
6352                     tmp = load_reg(s, rt);
6353                     tmpptr = tcg_const_ptr(ri);
6354                     gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
6355                     tcg_temp_free_ptr(tmpptr);
6356                     tcg_temp_free_i32(tmp);
6357                 } else {
6358                     TCGv_i32 tmp = load_reg(s, rt);
6359                     store_cpu_offset(tmp, ri->fieldoffset);
6360                 }
6361             }
6362         }
6363
6364         /* I/O operations must end the TB here (whether read or write) */
6365         need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
6366                         (ri->type & ARM_CP_IO));
6367
6368         if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
6369             /*
6370              * A write to any coprocessor register that ends a TB
6371              * must rebuild the hflags for the next TB.
6372              */
6373             TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
6374             if (arm_dc_feature(s, ARM_FEATURE_M)) {
6375                 gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
6376             } else {
6377                 if (ri->type & ARM_CP_NEWEL) {
6378                     gen_helper_rebuild_hflags_a32_newel(cpu_env);
6379                 } else {
6380                     gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
6381                 }
6382             }
6383             tcg_temp_free_i32(tcg_el);
6384             /*
6385              * We default to ending the TB on a coprocessor register write,
6386              * but allow this to be suppressed by the register definition
6387              * (usually only necessary to work around guest bugs).
6388              */
6389             need_exit_tb = true;
6390         }
6391         if (need_exit_tb) {
6392             gen_lookup_tb(s);
6393         }
6394
6395         return 0;
6396     }
6397
6398     /* Unknown register; this might be a guest error or a QEMU
6399      * unimplemented feature.
6400      */
6401     if (is64) {
6402         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
6403                       "64 bit system register cp:%d opc1: %d crm:%d "
6404                       "(%s)\n",
6405                       isread ? "read" : "write", cpnum, opc1, crm,
6406                       s->ns ? "non-secure" : "secure");
6407     } else {
6408         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
6409                       "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
6410                       "(%s)\n",
6411                       isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
6412                       s->ns ? "non-secure" : "secure");
6413     }
6414
6415     return 1;
6416 }
6417
6418
6419 /* Store a 64-bit value to a register pair.  Clobbers val.  */
6420 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
6421 {
6422     TCGv_i32 tmp;
6423     tmp = tcg_temp_new_i32();
6424     tcg_gen_extrl_i64_i32(tmp, val);
6425     store_reg(s, rlow, tmp);
6426     tmp = tcg_temp_new_i32();
6427     tcg_gen_extrh_i64_i32(tmp, val);
6428     store_reg(s, rhigh, tmp);
6429 }
6430
6431 /* load and add a 64-bit value from a register pair.  */
6432 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
6433 {
6434     TCGv_i64 tmp;
6435     TCGv_i32 tmpl;
6436     TCGv_i32 tmph;
6437
6438     /* Load 64-bit value rd:rn.  */
6439     tmpl = load_reg(s, rlow);
6440     tmph = load_reg(s, rhigh);
6441     tmp = tcg_temp_new_i64();
6442     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
6443     tcg_temp_free_i32(tmpl);
6444     tcg_temp_free_i32(tmph);
6445     tcg_gen_add_i64(val, val, tmp);
6446     tcg_temp_free_i64(tmp);
6447 }
6448
6449 /* Set N and Z flags from hi|lo.  */
6450 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
6451 {
6452     tcg_gen_mov_i32(cpu_NF, hi);
6453     tcg_gen_or_i32(cpu_ZF, lo, hi);
6454 }
6455
6456 /* Load/Store exclusive instructions are implemented by remembering
6457    the value/address loaded, and seeing if these are the same
6458    when the store is performed.  This should be sufficient to implement
6459    the architecturally mandated semantics, and avoids having to monitor
6460    regular stores.  The compare vs the remembered value is done during
6461    the cmpxchg operation, but we must compare the addresses manually.  */
6462 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
6463                                TCGv_i32 addr, int size)
6464 {
6465     TCGv_i32 tmp = tcg_temp_new_i32();
6466     MemOp opc = size | MO_ALIGN | s->be_data;
6467
6468     s->is_ldex = true;
6469
6470     if (size == 3) {
6471         TCGv_i32 tmp2 = tcg_temp_new_i32();
6472         TCGv_i64 t64 = tcg_temp_new_i64();
6473
6474         /* For AArch32, architecturally the 32-bit word at the lowest
6475          * address is always Rt and the one at addr+4 is Rt2, even if
6476          * the CPU is big-endian. That means we don't want to do a
6477          * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
6478          * for an architecturally 64-bit access, but instead do a
6479          * 64-bit access using MO_BE if appropriate and then split
6480          * the two halves.
6481          * This only makes a difference for BE32 user-mode, where
6482          * frob64() must not flip the two halves of the 64-bit data
6483          * but this code must treat BE32 user-mode like BE32 system.
6484          */
6485         TCGv taddr = gen_aa32_addr(s, addr, opc);
6486
6487         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
6488         tcg_temp_free(taddr);
6489         tcg_gen_mov_i64(cpu_exclusive_val, t64);
6490         if (s->be_data == MO_BE) {
6491             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
6492         } else {
6493             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
6494         }
6495         tcg_temp_free_i64(t64);
6496
6497         store_reg(s, rt2, tmp2);
6498     } else {
6499         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
6500         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
6501     }
6502
6503     store_reg(s, rt, tmp);
6504     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
6505 }
6506
6507 static void gen_clrex(DisasContext *s)
6508 {
6509     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
6510 }
6511
6512 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
6513                                 TCGv_i32 addr, int size)
6514 {
6515     TCGv_i32 t0, t1, t2;
6516     TCGv_i64 extaddr;
6517     TCGv taddr;
6518     TCGLabel *done_label;
6519     TCGLabel *fail_label;
6520     MemOp opc = size | MO_ALIGN | s->be_data;
6521
6522     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
6523          [addr] = {Rt};
6524          {Rd} = 0;
6525        } else {
6526          {Rd} = 1;
6527        } */
6528     fail_label = gen_new_label();
6529     done_label = gen_new_label();
6530     extaddr = tcg_temp_new_i64();
6531     tcg_gen_extu_i32_i64(extaddr, addr);
6532     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
6533     tcg_temp_free_i64(extaddr);
6534
6535     taddr = gen_aa32_addr(s, addr, opc);
6536     t0 = tcg_temp_new_i32();
6537     t1 = load_reg(s, rt);
6538     if (size == 3) {
6539         TCGv_i64 o64 = tcg_temp_new_i64();
6540         TCGv_i64 n64 = tcg_temp_new_i64();
6541
6542         t2 = load_reg(s, rt2);
6543         /* For AArch32, architecturally the 32-bit word at the lowest
6544          * address is always Rt and the one at addr+4 is Rt2, even if
6545          * the CPU is big-endian. Since we're going to treat this as a
6546          * single 64-bit BE store, we need to put the two halves in the
6547          * opposite order for BE to LE, so that they end up in the right
6548          * places.
6549          * We don't want gen_aa32_frob64() because that does the wrong
6550          * thing for BE32 usermode.
6551          */
6552         if (s->be_data == MO_BE) {
6553             tcg_gen_concat_i32_i64(n64, t2, t1);
6554         } else {
6555             tcg_gen_concat_i32_i64(n64, t1, t2);
6556         }
6557         tcg_temp_free_i32(t2);
6558
6559         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
6560                                    get_mem_index(s), opc);
6561         tcg_temp_free_i64(n64);
6562
6563         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
6564         tcg_gen_extrl_i64_i32(t0, o64);
6565
6566         tcg_temp_free_i64(o64);
6567     } else {
6568         t2 = tcg_temp_new_i32();
6569         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
6570         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
6571         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
6572         tcg_temp_free_i32(t2);
6573     }
6574     tcg_temp_free_i32(t1);
6575     tcg_temp_free(taddr);
6576     tcg_gen_mov_i32(cpu_R[rd], t0);
6577     tcg_temp_free_i32(t0);
6578     tcg_gen_br(done_label);
6579
6580     gen_set_label(fail_label);
6581     tcg_gen_movi_i32(cpu_R[rd], 1);
6582     gen_set_label(done_label);
6583     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
6584 }
6585
6586 /* gen_srs:
6587  * @env: CPUARMState
6588  * @s: DisasContext
6589  * @mode: mode field from insn (which stack to store to)
6590  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
6591  * @writeback: true if writeback bit set
6592  *
6593  * Generate code for the SRS (Store Return State) insn.
6594  */
6595 static void gen_srs(DisasContext *s,
6596                     uint32_t mode, uint32_t amode, bool writeback)
6597 {
6598     int32_t offset;
6599     TCGv_i32 addr, tmp;
6600     bool undef = false;
6601
6602     /* SRS is:
6603      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
6604      *   and specified mode is monitor mode
6605      * - UNDEFINED in Hyp mode
6606      * - UNPREDICTABLE in User or System mode
6607      * - UNPREDICTABLE if the specified mode is:
6608      * -- not implemented
6609      * -- not a valid mode number
6610      * -- a mode that's at a higher exception level
6611      * -- Monitor, if we are Non-secure
6612      * For the UNPREDICTABLE cases we choose to UNDEF.
6613      */
6614     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
6615         gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
6616         return;
6617     }
6618
6619     if (s->current_el == 0 || s->current_el == 2) {
6620         undef = true;
6621     }
6622
6623     switch (mode) {
6624     case ARM_CPU_MODE_USR:
6625     case ARM_CPU_MODE_FIQ:
6626     case ARM_CPU_MODE_IRQ:
6627     case ARM_CPU_MODE_SVC:
6628     case ARM_CPU_MODE_ABT:
6629     case ARM_CPU_MODE_UND:
6630     case ARM_CPU_MODE_SYS:
6631         break;
6632     case ARM_CPU_MODE_HYP:
6633         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
6634             undef = true;
6635         }
6636         break;
6637     case ARM_CPU_MODE_MON:
6638         /* No need to check specifically for "are we non-secure" because
6639          * we've already made EL0 UNDEF and handled the trap for S-EL1;
6640          * so if this isn't EL3 then we must be non-secure.
6641          */
6642         if (s->current_el != 3) {
6643             undef = true;
6644         }
6645         break;
6646     default:
6647         undef = true;
6648     }
6649
6650     if (undef) {
6651         unallocated_encoding(s);
6652         return;
6653     }
6654
6655     addr = tcg_temp_new_i32();
6656     tmp = tcg_const_i32(mode);
6657     /* get_r13_banked() will raise an exception if called from System mode */
6658     gen_set_condexec(s);
6659     gen_set_pc_im(s, s->pc_curr);
6660     gen_helper_get_r13_banked(addr, cpu_env, tmp);
6661     tcg_temp_free_i32(tmp);
6662     switch (amode) {
6663     case 0: /* DA */
6664         offset = -4;
6665         break;
6666     case 1: /* IA */
6667         offset = 0;
6668         break;
6669     case 2: /* DB */
6670         offset = -8;
6671         break;
6672     case 3: /* IB */
6673         offset = 4;
6674         break;
6675     default:
6676         abort();
6677     }
6678     tcg_gen_addi_i32(addr, addr, offset);
6679     tmp = load_reg(s, 14);
6680     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
6681     tcg_temp_free_i32(tmp);
6682     tmp = load_cpu_field(spsr);
6683     tcg_gen_addi_i32(addr, addr, 4);
6684     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
6685     tcg_temp_free_i32(tmp);
6686     if (writeback) {
6687         switch (amode) {
6688         case 0:
6689             offset = -8;
6690             break;
6691         case 1:
6692             offset = 4;
6693             break;
6694         case 2:
6695             offset = -4;
6696             break;
6697         case 3:
6698             offset = 0;
6699             break;
6700         default:
6701             abort();
6702         }
6703         tcg_gen_addi_i32(addr, addr, offset);
6704         tmp = tcg_const_i32(mode);
6705         gen_helper_set_r13_banked(cpu_env, tmp, addr);
6706         tcg_temp_free_i32(tmp);
6707     }
6708     tcg_temp_free_i32(addr);
6709     s->base.is_jmp = DISAS_UPDATE;
6710 }
6711
6712 /* Generate a label used for skipping this instruction */
6713 static void arm_gen_condlabel(DisasContext *s)
6714 {
6715     if (!s->condjmp) {
6716         s->condlabel = gen_new_label();
6717         s->condjmp = 1;
6718     }
6719 }
6720
6721 /* Skip this instruction if the ARM condition is false */
6722 static void arm_skip_unless(DisasContext *s, uint32_t cond)
6723 {
6724     arm_gen_condlabel(s);
6725     arm_gen_test_cc(cond ^ 1, s->condlabel);
6726 }
6727
6728
6729 /*
6730  * Constant expanders for the decoders.
6731  */
6732
6733 static int negate(DisasContext *s, int x)
6734 {
6735     return -x;
6736 }
6737
6738 static int plus_2(DisasContext *s, int x)
6739 {
6740     return x + 2;
6741 }
6742
6743 static int times_2(DisasContext *s, int x)
6744 {
6745     return x * 2;
6746 }
6747
6748 static int times_4(DisasContext *s, int x)
6749 {
6750     return x * 4;
6751 }
6752
6753 /* Return only the rotation part of T32ExpandImm.  */
6754 static int t32_expandimm_rot(DisasContext *s, int x)
6755 {
6756     return x & 0xc00 ? extract32(x, 7, 5) : 0;
6757 }
6758
6759 /* Return the unrotated immediate from T32ExpandImm.  */
6760 static int t32_expandimm_imm(DisasContext *s, int x)
6761 {
6762     int imm = extract32(x, 0, 8);
6763
6764     switch (extract32(x, 8, 4)) {
6765     case 0: /* XY */
6766         /* Nothing to do.  */
6767         break;
6768     case 1: /* 00XY00XY */
6769         imm *= 0x00010001;
6770         break;
6771     case 2: /* XY00XY00 */
6772         imm *= 0x01000100;
6773         break;
6774     case 3: /* XYXYXYXY */
6775         imm *= 0x01010101;
6776         break;
6777     default:
6778         /* Rotated constant.  */
6779         imm |= 0x80;
6780         break;
6781     }
6782     return imm;
6783 }
6784
6785 static int t32_branch24(DisasContext *s, int x)
6786 {
6787     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
6788     x ^= !(x < 0) * (3 << 21);
6789     /* Append the final zero.  */
6790     return x << 1;
6791 }
6792
6793 static int t16_setflags(DisasContext *s)
6794 {
6795     return s->condexec_mask == 0;
6796 }
6797
6798 static int t16_push_list(DisasContext *s, int x)
6799 {
6800     return (x & 0xff) | (x & 0x100) << (14 - 8);
6801 }
6802
6803 static int t16_pop_list(DisasContext *s, int x)
6804 {
6805     return (x & 0xff) | (x & 0x100) << (15 - 8);
6806 }
6807
6808 /*
6809  * Include the generated decoders.
6810  */
6811
6812 #include "decode-a32.inc.c"
6813 #include "decode-a32-uncond.inc.c"
6814 #include "decode-t32.inc.c"
6815 #include "decode-t16.inc.c"
6816
6817 /* Helpers to swap operands for reverse-subtract.  */
6818 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
6819 {
6820     tcg_gen_sub_i32(dst, b, a);
6821 }
6822
6823 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
6824 {
6825     gen_sub_CC(dst, b, a);
6826 }
6827
6828 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
6829 {
6830     gen_sub_carry(dest, b, a);
6831 }
6832
6833 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
6834 {
6835     gen_sbc_CC(dest, b, a);
6836 }
6837
6838 /*
6839  * Helpers for the data processing routines.
6840  *
6841  * After the computation store the results back.
6842  * This may be suppressed altogether (STREG_NONE), require a runtime
6843  * check against the stack limits (STREG_SP_CHECK), or generate an
6844  * exception return.  Oh, or store into a register.
6845  *
6846  * Always return true, indicating success for a trans_* function.
6847  */
6848 typedef enum {
6849    STREG_NONE,
6850    STREG_NORMAL,
6851    STREG_SP_CHECK,
6852    STREG_EXC_RET,
6853 } StoreRegKind;
6854
6855 static bool store_reg_kind(DisasContext *s, int rd,
6856                             TCGv_i32 val, StoreRegKind kind)
6857 {
6858     switch (kind) {
6859     case STREG_NONE:
6860         tcg_temp_free_i32(val);
6861         return true;
6862     case STREG_NORMAL:
6863         /* See ALUWritePC: Interworking only from a32 mode. */
6864         if (s->thumb) {
6865             store_reg(s, rd, val);
6866         } else {
6867             store_reg_bx(s, rd, val);
6868         }
6869         return true;
6870     case STREG_SP_CHECK:
6871         store_sp_checked(s, val);
6872         return true;
6873     case STREG_EXC_RET:
6874         gen_exception_return(s, val);
6875         return true;
6876     }
6877     g_assert_not_reached();
6878 }
6879
6880 /*
6881  * Data Processing (register)
6882  *
6883  * Operate, with set flags, one register source,
6884  * one immediate shifted register source, and a destination.
6885  */
6886 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
6887                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
6888                          int logic_cc, StoreRegKind kind)
6889 {
6890     TCGv_i32 tmp1, tmp2;
6891
6892     tmp2 = load_reg(s, a->rm);
6893     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
6894     tmp1 = load_reg(s, a->rn);
6895
6896     gen(tmp1, tmp1, tmp2);
6897     tcg_temp_free_i32(tmp2);
6898
6899     if (logic_cc) {
6900         gen_logic_CC(tmp1);
6901     }
6902     return store_reg_kind(s, a->rd, tmp1, kind);
6903 }
6904
6905 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
6906                          void (*gen)(TCGv_i32, TCGv_i32),
6907                          int logic_cc, StoreRegKind kind)
6908 {
6909     TCGv_i32 tmp;
6910
6911     tmp = load_reg(s, a->rm);
6912     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
6913
6914     gen(tmp, tmp);
6915     if (logic_cc) {
6916         gen_logic_CC(tmp);
6917     }
6918     return store_reg_kind(s, a->rd, tmp, kind);
6919 }
6920
6921 /*
6922  * Data-processing (register-shifted register)
6923  *
6924  * Operate, with set flags, one register source,
6925  * one register shifted register source, and a destination.
6926  */
6927 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
6928                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
6929                          int logic_cc, StoreRegKind kind)
6930 {
6931     TCGv_i32 tmp1, tmp2;
6932
6933     tmp1 = load_reg(s, a->rs);
6934     tmp2 = load_reg(s, a->rm);
6935     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
6936     tmp1 = load_reg(s, a->rn);
6937
6938     gen(tmp1, tmp1, tmp2);
6939     tcg_temp_free_i32(tmp2);
6940
6941     if (logic_cc) {
6942         gen_logic_CC(tmp1);
6943     }
6944     return store_reg_kind(s, a->rd, tmp1, kind);
6945 }
6946
6947 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
6948                          void (*gen)(TCGv_i32, TCGv_i32),
6949                          int logic_cc, StoreRegKind kind)
6950 {
6951     TCGv_i32 tmp1, tmp2;
6952
6953     tmp1 = load_reg(s, a->rs);
6954     tmp2 = load_reg(s, a->rm);
6955     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
6956
6957     gen(tmp2, tmp2);
6958     if (logic_cc) {
6959         gen_logic_CC(tmp2);
6960     }
6961     return store_reg_kind(s, a->rd, tmp2, kind);
6962 }
6963
6964 /*
6965  * Data-processing (immediate)
6966  *
6967  * Operate, with set flags, one register source,
6968  * one rotated immediate, and a destination.
6969  *
6970  * Note that logic_cc && a->rot setting CF based on the msb of the
6971  * immediate is the reason why we must pass in the unrotated form
6972  * of the immediate.
6973  */
6974 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
6975                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
6976                          int logic_cc, StoreRegKind kind)
6977 {
6978     TCGv_i32 tmp1, tmp2;
6979     uint32_t imm;
6980
6981     imm = ror32(a->imm, a->rot);
6982     if (logic_cc && a->rot) {
6983         tcg_gen_movi_i32(cpu_CF, imm >> 31);
6984     }
6985     tmp2 = tcg_const_i32(imm);
6986     tmp1 = load_reg(s, a->rn);
6987
6988     gen(tmp1, tmp1, tmp2);
6989     tcg_temp_free_i32(tmp2);
6990
6991     if (logic_cc) {
6992         gen_logic_CC(tmp1);
6993     }
6994     return store_reg_kind(s, a->rd, tmp1, kind);
6995 }
6996
6997 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
6998                          void (*gen)(TCGv_i32, TCGv_i32),
6999                          int logic_cc, StoreRegKind kind)
7000 {
7001     TCGv_i32 tmp;
7002     uint32_t imm;
7003
7004     imm = ror32(a->imm, a->rot);
7005     if (logic_cc && a->rot) {
7006         tcg_gen_movi_i32(cpu_CF, imm >> 31);
7007     }
7008     tmp = tcg_const_i32(imm);
7009
7010     gen(tmp, tmp);
7011     if (logic_cc) {
7012         gen_logic_CC(tmp);
7013     }
7014     return store_reg_kind(s, a->rd, tmp, kind);
7015 }
7016
7017 #define DO_ANY3(NAME, OP, L, K)                                         \
7018     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
7019     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
7020     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
7021     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
7022     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
7023     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
7024
7025 #define DO_ANY2(NAME, OP, L, K)                                         \
7026     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
7027     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
7028     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
7029     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
7030     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
7031     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
7032
7033 #define DO_CMP2(NAME, OP, L)                                            \
7034     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
7035     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
7036     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
7037     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
7038     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
7039     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
7040
7041 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
7042 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
7043 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
7044 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
7045
7046 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
7047 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
7048 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
7049 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
7050
7051 DO_CMP2(TST, tcg_gen_and_i32, true)
7052 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
7053 DO_CMP2(CMN, gen_add_CC, false)
7054 DO_CMP2(CMP, gen_sub_CC, false)
7055
7056 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
7057         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
7058
7059 /*
7060  * Note for the computation of StoreRegKind we return out of the
7061  * middle of the functions that are expanded by DO_ANY3, and that
7062  * we modify a->s via that parameter before it is used by OP.
7063  */
7064 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
7065         ({
7066             StoreRegKind ret = STREG_NORMAL;
7067             if (a->rd == 15 && a->s) {
7068                 /*
7069                  * See ALUExceptionReturn:
7070                  * In User mode, UNPREDICTABLE; we choose UNDEF.
7071                  * In Hyp mode, UNDEFINED.
7072                  */
7073                 if (IS_USER(s) || s->current_el == 2) {
7074                     unallocated_encoding(s);
7075                     return true;
7076                 }
7077                 /* There is no writeback of nzcv to PSTATE.  */
7078                 a->s = 0;
7079                 ret = STREG_EXC_RET;
7080             } else if (a->rd == 13 && a->rn == 13) {
7081                 ret = STREG_SP_CHECK;
7082             }
7083             ret;
7084         }))
7085
7086 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
7087         ({
7088             StoreRegKind ret = STREG_NORMAL;
7089             if (a->rd == 15 && a->s) {
7090                 /*
7091                  * See ALUExceptionReturn:
7092                  * In User mode, UNPREDICTABLE; we choose UNDEF.
7093                  * In Hyp mode, UNDEFINED.
7094                  */
7095                 if (IS_USER(s) || s->current_el == 2) {
7096                     unallocated_encoding(s);
7097                     return true;
7098                 }
7099                 /* There is no writeback of nzcv to PSTATE.  */
7100                 a->s = 0;
7101                 ret = STREG_EXC_RET;
7102             } else if (a->rd == 13) {
7103                 ret = STREG_SP_CHECK;
7104             }
7105             ret;
7106         }))
7107
7108 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
7109
7110 /*
7111  * ORN is only available with T32, so there is no register-shifted-register
7112  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
7113  */
7114 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
7115 {
7116     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7117 }
7118
7119 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
7120 {
7121     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7122 }
7123
7124 #undef DO_ANY3
7125 #undef DO_ANY2
7126 #undef DO_CMP2
7127
7128 static bool trans_ADR(DisasContext *s, arg_ri *a)
7129 {
7130     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
7131     return true;
7132 }
7133
7134 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
7135 {
7136     TCGv_i32 tmp;
7137
7138     if (!ENABLE_ARCH_6T2) {
7139         return false;
7140     }
7141
7142     tmp = tcg_const_i32(a->imm);
7143     store_reg(s, a->rd, tmp);
7144     return true;
7145 }
7146
7147 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
7148 {
7149     TCGv_i32 tmp;
7150
7151     if (!ENABLE_ARCH_6T2) {
7152         return false;
7153     }
7154
7155     tmp = load_reg(s, a->rd);
7156     tcg_gen_ext16u_i32(tmp, tmp);
7157     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
7158     store_reg(s, a->rd, tmp);
7159     return true;
7160 }
7161
7162 /*
7163  * Multiply and multiply accumulate
7164  */
7165
7166 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
7167 {
7168     TCGv_i32 t1, t2;
7169
7170     t1 = load_reg(s, a->rn);
7171     t2 = load_reg(s, a->rm);
7172     tcg_gen_mul_i32(t1, t1, t2);
7173     tcg_temp_free_i32(t2);
7174     if (add) {
7175         t2 = load_reg(s, a->ra);
7176         tcg_gen_add_i32(t1, t1, t2);
7177         tcg_temp_free_i32(t2);
7178     }
7179     if (a->s) {
7180         gen_logic_CC(t1);
7181     }
7182     store_reg(s, a->rd, t1);
7183     return true;
7184 }
7185
7186 static bool trans_MUL(DisasContext *s, arg_MUL *a)
7187 {
7188     return op_mla(s, a, false);
7189 }
7190
7191 static bool trans_MLA(DisasContext *s, arg_MLA *a)
7192 {
7193     return op_mla(s, a, true);
7194 }
7195
7196 static bool trans_MLS(DisasContext *s, arg_MLS *a)
7197 {
7198     TCGv_i32 t1, t2;
7199
7200     if (!ENABLE_ARCH_6T2) {
7201         return false;
7202     }
7203     t1 = load_reg(s, a->rn);
7204     t2 = load_reg(s, a->rm);
7205     tcg_gen_mul_i32(t1, t1, t2);
7206     tcg_temp_free_i32(t2);
7207     t2 = load_reg(s, a->ra);
7208     tcg_gen_sub_i32(t1, t2, t1);
7209     tcg_temp_free_i32(t2);
7210     store_reg(s, a->rd, t1);
7211     return true;
7212 }
7213
7214 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
7215 {
7216     TCGv_i32 t0, t1, t2, t3;
7217
7218     t0 = load_reg(s, a->rm);
7219     t1 = load_reg(s, a->rn);
7220     if (uns) {
7221         tcg_gen_mulu2_i32(t0, t1, t0, t1);
7222     } else {
7223         tcg_gen_muls2_i32(t0, t1, t0, t1);
7224     }
7225     if (add) {
7226         t2 = load_reg(s, a->ra);
7227         t3 = load_reg(s, a->rd);
7228         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
7229         tcg_temp_free_i32(t2);
7230         tcg_temp_free_i32(t3);
7231     }
7232     if (a->s) {
7233         gen_logicq_cc(t0, t1);
7234     }
7235     store_reg(s, a->ra, t0);
7236     store_reg(s, a->rd, t1);
7237     return true;
7238 }
7239
7240 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
7241 {
7242     return op_mlal(s, a, true, false);
7243 }
7244
7245 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
7246 {
7247     return op_mlal(s, a, false, false);
7248 }
7249
7250 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
7251 {
7252     return op_mlal(s, a, true, true);
7253 }
7254
7255 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
7256 {
7257     return op_mlal(s, a, false, true);
7258 }
7259
7260 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
7261 {
7262     TCGv_i32 t0, t1, t2, zero;
7263
7264     if (s->thumb
7265         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7266         : !ENABLE_ARCH_6) {
7267         return false;
7268     }
7269
7270     t0 = load_reg(s, a->rm);
7271     t1 = load_reg(s, a->rn);
7272     tcg_gen_mulu2_i32(t0, t1, t0, t1);
7273     zero = tcg_const_i32(0);
7274     t2 = load_reg(s, a->ra);
7275     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7276     tcg_temp_free_i32(t2);
7277     t2 = load_reg(s, a->rd);
7278     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7279     tcg_temp_free_i32(t2);
7280     tcg_temp_free_i32(zero);
7281     store_reg(s, a->ra, t0);
7282     store_reg(s, a->rd, t1);
7283     return true;
7284 }
7285
7286 /*
7287  * Saturating addition and subtraction
7288  */
7289
7290 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
7291 {
7292     TCGv_i32 t0, t1;
7293
7294     if (s->thumb
7295         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7296         : !ENABLE_ARCH_5TE) {
7297         return false;
7298     }
7299
7300     t0 = load_reg(s, a->rm);
7301     t1 = load_reg(s, a->rn);
7302     if (doub) {
7303         gen_helper_add_saturate(t1, cpu_env, t1, t1);
7304     }
7305     if (add) {
7306         gen_helper_add_saturate(t0, cpu_env, t0, t1);
7307     } else {
7308         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
7309     }
7310     tcg_temp_free_i32(t1);
7311     store_reg(s, a->rd, t0);
7312     return true;
7313 }
7314
7315 #define DO_QADDSUB(NAME, ADD, DOUB) \
7316 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
7317 {                                                        \
7318     return op_qaddsub(s, a, ADD, DOUB);                  \
7319 }
7320
7321 DO_QADDSUB(QADD, true, false)
7322 DO_QADDSUB(QSUB, false, false)
7323 DO_QADDSUB(QDADD, true, true)
7324 DO_QADDSUB(QDSUB, false, true)
7325
7326 #undef DO_QADDSUB
7327
7328 /*
7329  * Halfword multiply and multiply accumulate
7330  */
7331
7332 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
7333                        int add_long, bool nt, bool mt)
7334 {
7335     TCGv_i32 t0, t1, tl, th;
7336
7337     if (s->thumb
7338         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7339         : !ENABLE_ARCH_5TE) {
7340         return false;
7341     }
7342
7343     t0 = load_reg(s, a->rn);
7344     t1 = load_reg(s, a->rm);
7345     gen_mulxy(t0, t1, nt, mt);
7346     tcg_temp_free_i32(t1);
7347
7348     switch (add_long) {
7349     case 0:
7350         store_reg(s, a->rd, t0);
7351         break;
7352     case 1:
7353         t1 = load_reg(s, a->ra);
7354         gen_helper_add_setq(t0, cpu_env, t0, t1);
7355         tcg_temp_free_i32(t1);
7356         store_reg(s, a->rd, t0);
7357         break;
7358     case 2:
7359         tl = load_reg(s, a->ra);
7360         th = load_reg(s, a->rd);
7361         /* Sign-extend the 32-bit product to 64 bits.  */
7362         t1 = tcg_temp_new_i32();
7363         tcg_gen_sari_i32(t1, t0, 31);
7364         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
7365         tcg_temp_free_i32(t0);
7366         tcg_temp_free_i32(t1);
7367         store_reg(s, a->ra, tl);
7368         store_reg(s, a->rd, th);
7369         break;
7370     default:
7371         g_assert_not_reached();
7372     }
7373     return true;
7374 }
7375
7376 #define DO_SMLAX(NAME, add, nt, mt) \
7377 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
7378 {                                                          \
7379     return op_smlaxxx(s, a, add, nt, mt);                  \
7380 }
7381
7382 DO_SMLAX(SMULBB, 0, 0, 0)
7383 DO_SMLAX(SMULBT, 0, 0, 1)
7384 DO_SMLAX(SMULTB, 0, 1, 0)
7385 DO_SMLAX(SMULTT, 0, 1, 1)
7386
7387 DO_SMLAX(SMLABB, 1, 0, 0)
7388 DO_SMLAX(SMLABT, 1, 0, 1)
7389 DO_SMLAX(SMLATB, 1, 1, 0)
7390 DO_SMLAX(SMLATT, 1, 1, 1)
7391
7392 DO_SMLAX(SMLALBB, 2, 0, 0)
7393 DO_SMLAX(SMLALBT, 2, 0, 1)
7394 DO_SMLAX(SMLALTB, 2, 1, 0)
7395 DO_SMLAX(SMLALTT, 2, 1, 1)
7396
7397 #undef DO_SMLAX
7398
7399 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
7400 {
7401     TCGv_i32 t0, t1;
7402
7403     if (!ENABLE_ARCH_5TE) {
7404         return false;
7405     }
7406
7407     t0 = load_reg(s, a->rn);
7408     t1 = load_reg(s, a->rm);
7409     /*
7410      * Since the nominal result is product<47:16>, shift the 16-bit
7411      * input up by 16 bits, so that the result is at product<63:32>.
7412      */
7413     if (mt) {
7414         tcg_gen_andi_i32(t1, t1, 0xffff0000);
7415     } else {
7416         tcg_gen_shli_i32(t1, t1, 16);
7417     }
7418     tcg_gen_muls2_i32(t0, t1, t0, t1);
7419     tcg_temp_free_i32(t0);
7420     if (add) {
7421         t0 = load_reg(s, a->ra);
7422         gen_helper_add_setq(t1, cpu_env, t1, t0);
7423         tcg_temp_free_i32(t0);
7424     }
7425     store_reg(s, a->rd, t1);
7426     return true;
7427 }
7428
7429 #define DO_SMLAWX(NAME, add, mt) \
7430 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
7431 {                                                          \
7432     return op_smlawx(s, a, add, mt);                       \
7433 }
7434
7435 DO_SMLAWX(SMULWB, 0, 0)
7436 DO_SMLAWX(SMULWT, 0, 1)
7437 DO_SMLAWX(SMLAWB, 1, 0)
7438 DO_SMLAWX(SMLAWT, 1, 1)
7439
7440 #undef DO_SMLAWX
7441
7442 /*
7443  * MSR (immediate) and hints
7444  */
7445
7446 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
7447 {
7448     /*
7449      * When running single-threaded TCG code, use the helper to ensure that
7450      * the next round-robin scheduled vCPU gets a crack.  When running in
7451      * MTTCG we don't generate jumps to the helper as it won't affect the
7452      * scheduling of other vCPUs.
7453      */
7454     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
7455         gen_set_pc_im(s, s->base.pc_next);
7456         s->base.is_jmp = DISAS_YIELD;
7457     }
7458     return true;
7459 }
7460
7461 static bool trans_WFE(DisasContext *s, arg_WFE *a)
7462 {
7463     /*
7464      * When running single-threaded TCG code, use the helper to ensure that
7465      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
7466      * just skip this instruction.  Currently the SEV/SEVL instructions,
7467      * which are *one* of many ways to wake the CPU from WFE, are not
7468      * implemented so we can't sleep like WFI does.
7469      */
7470     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
7471         gen_set_pc_im(s, s->base.pc_next);
7472         s->base.is_jmp = DISAS_WFE;
7473     }
7474     return true;
7475 }
7476
7477 static bool trans_WFI(DisasContext *s, arg_WFI *a)
7478 {
7479     /* For WFI, halt the vCPU until an IRQ. */
7480     gen_set_pc_im(s, s->base.pc_next);
7481     s->base.is_jmp = DISAS_WFI;
7482     return true;
7483 }
7484
7485 static bool trans_NOP(DisasContext *s, arg_NOP *a)
7486 {
7487     return true;
7488 }
7489
7490 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
7491 {
7492     uint32_t val = ror32(a->imm, a->rot * 2);
7493     uint32_t mask = msr_mask(s, a->mask, a->r);
7494
7495     if (gen_set_psr_im(s, mask, a->r, val)) {
7496         unallocated_encoding(s);
7497     }
7498     return true;
7499 }
7500
7501 /*
7502  * Cyclic Redundancy Check
7503  */
7504
7505 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
7506 {
7507     TCGv_i32 t1, t2, t3;
7508
7509     if (!dc_isar_feature(aa32_crc32, s)) {
7510         return false;
7511     }
7512
7513     t1 = load_reg(s, a->rn);
7514     t2 = load_reg(s, a->rm);
7515     switch (sz) {
7516     case MO_8:
7517         gen_uxtb(t2);
7518         break;
7519     case MO_16:
7520         gen_uxth(t2);
7521         break;
7522     case MO_32:
7523         break;
7524     default:
7525         g_assert_not_reached();
7526     }
7527     t3 = tcg_const_i32(1 << sz);
7528     if (c) {
7529         gen_helper_crc32c(t1, t1, t2, t3);
7530     } else {
7531         gen_helper_crc32(t1, t1, t2, t3);
7532     }
7533     tcg_temp_free_i32(t2);
7534     tcg_temp_free_i32(t3);
7535     store_reg(s, a->rd, t1);
7536     return true;
7537 }
7538
7539 #define DO_CRC32(NAME, c, sz) \
7540 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
7541     { return op_crc32(s, a, c, sz); }
7542
7543 DO_CRC32(CRC32B, false, MO_8)
7544 DO_CRC32(CRC32H, false, MO_16)
7545 DO_CRC32(CRC32W, false, MO_32)
7546 DO_CRC32(CRC32CB, true, MO_8)
7547 DO_CRC32(CRC32CH, true, MO_16)
7548 DO_CRC32(CRC32CW, true, MO_32)
7549
7550 #undef DO_CRC32
7551
7552 /*
7553  * Miscellaneous instructions
7554  */
7555
7556 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
7557 {
7558     if (arm_dc_feature(s, ARM_FEATURE_M)) {
7559         return false;
7560     }
7561     gen_mrs_banked(s, a->r, a->sysm, a->rd);
7562     return true;
7563 }
7564
7565 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
7566 {
7567     if (arm_dc_feature(s, ARM_FEATURE_M)) {
7568         return false;
7569     }
7570     gen_msr_banked(s, a->r, a->sysm, a->rn);
7571     return true;
7572 }
7573
7574 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
7575 {
7576     TCGv_i32 tmp;
7577
7578     if (arm_dc_feature(s, ARM_FEATURE_M)) {
7579         return false;
7580     }
7581     if (a->r) {
7582         if (IS_USER(s)) {
7583             unallocated_encoding(s);
7584             return true;
7585         }
7586         tmp = load_cpu_field(spsr);
7587     } else {
7588         tmp = tcg_temp_new_i32();
7589         gen_helper_cpsr_read(tmp, cpu_env);
7590     }
7591     store_reg(s, a->rd, tmp);
7592     return true;
7593 }
7594
7595 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
7596 {
7597     TCGv_i32 tmp;
7598     uint32_t mask = msr_mask(s, a->mask, a->r);
7599
7600     if (arm_dc_feature(s, ARM_FEATURE_M)) {
7601         return false;
7602     }
7603     tmp = load_reg(s, a->rn);
7604     if (gen_set_psr(s, mask, a->r, tmp)) {
7605         unallocated_encoding(s);
7606     }
7607     return true;
7608 }
7609
7610 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
7611 {
7612     TCGv_i32 tmp;
7613
7614     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
7615         return false;
7616     }
7617     tmp = tcg_const_i32(a->sysm);
7618     gen_helper_v7m_mrs(tmp, cpu_env, tmp);
7619     store_reg(s, a->rd, tmp);
7620     return true;
7621 }
7622
7623 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
7624 {
7625     TCGv_i32 addr, reg;
7626
7627     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
7628         return false;
7629     }
7630     addr = tcg_const_i32((a->mask << 10) | a->sysm);
7631     reg = load_reg(s, a->rn);
7632     gen_helper_v7m_msr(cpu_env, addr, reg);
7633     tcg_temp_free_i32(addr);
7634     tcg_temp_free_i32(reg);
7635     /* If we wrote to CONTROL, the EL might have changed */
7636     gen_helper_rebuild_hflags_m32_newel(cpu_env);
7637     gen_lookup_tb(s);
7638     return true;
7639 }
7640
7641 static bool trans_BX(DisasContext *s, arg_BX *a)
7642 {
7643     if (!ENABLE_ARCH_4T) {
7644         return false;
7645     }
7646     gen_bx_excret(s, load_reg(s, a->rm));
7647     return true;
7648 }
7649
7650 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
7651 {
7652     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
7653         return false;
7654     }
7655     /* Trivial implementation equivalent to bx.  */
7656     gen_bx(s, load_reg(s, a->rm));
7657     return true;
7658 }
7659
7660 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
7661 {
7662     TCGv_i32 tmp;
7663
7664     if (!ENABLE_ARCH_5) {
7665         return false;
7666     }
7667     tmp = load_reg(s, a->rm);
7668     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
7669     gen_bx(s, tmp);
7670     return true;
7671 }
7672
7673 /*
7674  * BXNS/BLXNS: only exist for v8M with the security extensions,
7675  * and always UNDEF if NonSecure.  We don't implement these in
7676  * the user-only mode either (in theory you can use them from
7677  * Secure User mode but they are too tied in to system emulation).
7678  */
7679 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
7680 {
7681     if (!s->v8m_secure || IS_USER_ONLY) {
7682         unallocated_encoding(s);
7683     } else {
7684         gen_bxns(s, a->rm);
7685     }
7686     return true;
7687 }
7688
7689 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
7690 {
7691     if (!s->v8m_secure || IS_USER_ONLY) {
7692         unallocated_encoding(s);
7693     } else {
7694         gen_blxns(s, a->rm);
7695     }
7696     return true;
7697 }
7698
7699 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
7700 {
7701     TCGv_i32 tmp;
7702
7703     if (!ENABLE_ARCH_5) {
7704         return false;
7705     }
7706     tmp = load_reg(s, a->rm);
7707     tcg_gen_clzi_i32(tmp, tmp, 32);
7708     store_reg(s, a->rd, tmp);
7709     return true;
7710 }
7711
7712 static bool trans_ERET(DisasContext *s, arg_ERET *a)
7713 {
7714     TCGv_i32 tmp;
7715
7716     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
7717         return false;
7718     }
7719     if (IS_USER(s)) {
7720         unallocated_encoding(s);
7721         return true;
7722     }
7723     if (s->current_el == 2) {
7724         /* ERET from Hyp uses ELR_Hyp, not LR */
7725         tmp = load_cpu_field(elr_el[2]);
7726     } else {
7727         tmp = load_reg(s, 14);
7728     }
7729     gen_exception_return(s, tmp);
7730     return true;
7731 }
7732
7733 static bool trans_HLT(DisasContext *s, arg_HLT *a)
7734 {
7735     gen_hlt(s, a->imm);
7736     return true;
7737 }
7738
7739 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
7740 {
7741     if (!ENABLE_ARCH_5) {
7742         return false;
7743     }
7744     if (arm_dc_feature(s, ARM_FEATURE_M) &&
7745         semihosting_enabled() &&
7746 #ifndef CONFIG_USER_ONLY
7747         !IS_USER(s) &&
7748 #endif
7749         (a->imm == 0xab)) {
7750         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
7751     } else {
7752         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
7753     }
7754     return true;
7755 }
7756
7757 static bool trans_HVC(DisasContext *s, arg_HVC *a)
7758 {
7759     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
7760         return false;
7761     }
7762     if (IS_USER(s)) {
7763         unallocated_encoding(s);
7764     } else {
7765         gen_hvc(s, a->imm);
7766     }
7767     return true;
7768 }
7769
7770 static bool trans_SMC(DisasContext *s, arg_SMC *a)
7771 {
7772     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
7773         return false;
7774     }
7775     if (IS_USER(s)) {
7776         unallocated_encoding(s);
7777     } else {
7778         gen_smc(s);
7779     }
7780     return true;
7781 }
7782
7783 static bool trans_SG(DisasContext *s, arg_SG *a)
7784 {
7785     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
7786         !arm_dc_feature(s, ARM_FEATURE_V8)) {
7787         return false;
7788     }
7789     /*
7790      * SG (v8M only)
7791      * The bulk of the behaviour for this instruction is implemented
7792      * in v7m_handle_execute_nsc(), which deals with the insn when
7793      * it is executed by a CPU in non-secure state from memory
7794      * which is Secure & NonSecure-Callable.
7795      * Here we only need to handle the remaining cases:
7796      *  * in NS memory (including the "security extension not
7797      *    implemented" case) : NOP
7798      *  * in S memory but CPU already secure (clear IT bits)
7799      * We know that the attribute for the memory this insn is
7800      * in must match the current CPU state, because otherwise
7801      * get_phys_addr_pmsav8 would have generated an exception.
7802      */
7803     if (s->v8m_secure) {
7804         /* Like the IT insn, we don't need to generate any code */
7805         s->condexec_cond = 0;
7806         s->condexec_mask = 0;
7807     }
7808     return true;
7809 }
7810
7811 static bool trans_TT(DisasContext *s, arg_TT *a)
7812 {
7813     TCGv_i32 addr, tmp;
7814
7815     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
7816         !arm_dc_feature(s, ARM_FEATURE_V8)) {
7817         return false;
7818     }
7819     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
7820         /* We UNDEF for these UNPREDICTABLE cases */
7821         unallocated_encoding(s);
7822         return true;
7823     }
7824     if (a->A && !s->v8m_secure) {
7825         /* This case is UNDEFINED.  */
7826         unallocated_encoding(s);
7827         return true;
7828     }
7829
7830     addr = load_reg(s, a->rn);
7831     tmp = tcg_const_i32((a->A << 1) | a->T);
7832     gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
7833     tcg_temp_free_i32(addr);
7834     store_reg(s, a->rd, tmp);
7835     return true;
7836 }
7837
7838 /*
7839  * Load/store register index
7840  */
7841
7842 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
7843 {
7844     ISSInfo ret;
7845
7846     /* ISS not valid if writeback */
7847     if (p && !w) {
7848         ret = rd;
7849         if (s->base.pc_next - s->pc_curr == 2) {
7850             ret |= ISSIs16Bit;
7851         }
7852     } else {
7853         ret = ISSInvalid;
7854     }
7855     return ret;
7856 }
7857
7858 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
7859 {
7860     TCGv_i32 addr = load_reg(s, a->rn);
7861
7862     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7863         gen_helper_v8m_stackcheck(cpu_env, addr);
7864     }
7865
7866     if (a->p) {
7867         TCGv_i32 ofs = load_reg(s, a->rm);
7868         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
7869         if (a->u) {
7870             tcg_gen_add_i32(addr, addr, ofs);
7871         } else {
7872             tcg_gen_sub_i32(addr, addr, ofs);
7873         }
7874         tcg_temp_free_i32(ofs);
7875     }
7876     return addr;
7877 }
7878
7879 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
7880                             TCGv_i32 addr, int address_offset)
7881 {
7882     if (!a->p) {
7883         TCGv_i32 ofs = load_reg(s, a->rm);
7884         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
7885         if (a->u) {
7886             tcg_gen_add_i32(addr, addr, ofs);
7887         } else {
7888             tcg_gen_sub_i32(addr, addr, ofs);
7889         }
7890         tcg_temp_free_i32(ofs);
7891     } else if (!a->w) {
7892         tcg_temp_free_i32(addr);
7893         return;
7894     }
7895     tcg_gen_addi_i32(addr, addr, address_offset);
7896     store_reg(s, a->rn, addr);
7897 }
7898
7899 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
7900                        MemOp mop, int mem_idx)
7901 {
7902     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
7903     TCGv_i32 addr, tmp;
7904
7905     addr = op_addr_rr_pre(s, a);
7906
7907     tmp = tcg_temp_new_i32();
7908     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
7909     disas_set_da_iss(s, mop, issinfo);
7910
7911     /*
7912      * Perform base writeback before the loaded value to
7913      * ensure correct behavior with overlapping index registers.
7914      */
7915     op_addr_rr_post(s, a, addr, 0);
7916     store_reg_from_load(s, a->rt, tmp);
7917     return true;
7918 }
7919
7920 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
7921                         MemOp mop, int mem_idx)
7922 {
7923     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
7924     TCGv_i32 addr, tmp;
7925
7926     addr = op_addr_rr_pre(s, a);
7927
7928     tmp = load_reg(s, a->rt);
7929     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
7930     disas_set_da_iss(s, mop, issinfo);
7931     tcg_temp_free_i32(tmp);
7932
7933     op_addr_rr_post(s, a, addr, 0);
7934     return true;
7935 }
7936
7937 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
7938 {
7939     int mem_idx = get_mem_index(s);
7940     TCGv_i32 addr, tmp;
7941
7942     if (!ENABLE_ARCH_5TE) {
7943         return false;
7944     }
7945     if (a->rt & 1) {
7946         unallocated_encoding(s);
7947         return true;
7948     }
7949     addr = op_addr_rr_pre(s, a);
7950
7951     tmp = tcg_temp_new_i32();
7952     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7953     store_reg(s, a->rt, tmp);
7954
7955     tcg_gen_addi_i32(addr, addr, 4);
7956
7957     tmp = tcg_temp_new_i32();
7958     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7959     store_reg(s, a->rt + 1, tmp);
7960
7961     /* LDRD w/ base writeback is undefined if the registers overlap.  */
7962     op_addr_rr_post(s, a, addr, -4);
7963     return true;
7964 }
7965
7966 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
7967 {
7968     int mem_idx = get_mem_index(s);
7969     TCGv_i32 addr, tmp;
7970
7971     if (!ENABLE_ARCH_5TE) {
7972         return false;
7973     }
7974     if (a->rt & 1) {
7975         unallocated_encoding(s);
7976         return true;
7977     }
7978     addr = op_addr_rr_pre(s, a);
7979
7980     tmp = load_reg(s, a->rt);
7981     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7982     tcg_temp_free_i32(tmp);
7983
7984     tcg_gen_addi_i32(addr, addr, 4);
7985
7986     tmp = load_reg(s, a->rt + 1);
7987     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7988     tcg_temp_free_i32(tmp);
7989
7990     op_addr_rr_post(s, a, addr, -4);
7991     return true;
7992 }
7993
7994 /*
7995  * Load/store immediate index
7996  */
7997
7998 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
7999 {
8000     int ofs = a->imm;
8001
8002     if (!a->u) {
8003         ofs = -ofs;
8004     }
8005
8006     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
8007         /*
8008          * Stackcheck. Here we know 'addr' is the current SP;
8009          * U is set if we're moving SP up, else down. It is
8010          * UNKNOWN whether the limit check triggers when SP starts
8011          * below the limit and ends up above it; we chose to do so.
8012          */
8013         if (!a->u) {
8014             TCGv_i32 newsp = tcg_temp_new_i32();
8015             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
8016             gen_helper_v8m_stackcheck(cpu_env, newsp);
8017             tcg_temp_free_i32(newsp);
8018         } else {
8019             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
8020         }
8021     }
8022
8023     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
8024 }
8025
8026 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
8027                             TCGv_i32 addr, int address_offset)
8028 {
8029     if (!a->p) {
8030         if (a->u) {
8031             address_offset += a->imm;
8032         } else {
8033             address_offset -= a->imm;
8034         }
8035     } else if (!a->w) {
8036         tcg_temp_free_i32(addr);
8037         return;
8038     }
8039     tcg_gen_addi_i32(addr, addr, address_offset);
8040     store_reg(s, a->rn, addr);
8041 }
8042
8043 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
8044                        MemOp mop, int mem_idx)
8045 {
8046     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8047     TCGv_i32 addr, tmp;
8048
8049     addr = op_addr_ri_pre(s, a);
8050
8051     tmp = tcg_temp_new_i32();
8052     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8053     disas_set_da_iss(s, mop, issinfo);
8054
8055     /*
8056      * Perform base writeback before the loaded value to
8057      * ensure correct behavior with overlapping index registers.
8058      */
8059     op_addr_ri_post(s, a, addr, 0);
8060     store_reg_from_load(s, a->rt, tmp);
8061     return true;
8062 }
8063
8064 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
8065                         MemOp mop, int mem_idx)
8066 {
8067     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8068     TCGv_i32 addr, tmp;
8069
8070     addr = op_addr_ri_pre(s, a);
8071
8072     tmp = load_reg(s, a->rt);
8073     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8074     disas_set_da_iss(s, mop, issinfo);
8075     tcg_temp_free_i32(tmp);
8076
8077     op_addr_ri_post(s, a, addr, 0);
8078     return true;
8079 }
8080
8081 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8082 {
8083     int mem_idx = get_mem_index(s);
8084     TCGv_i32 addr, tmp;
8085
8086     addr = op_addr_ri_pre(s, a);
8087
8088     tmp = tcg_temp_new_i32();
8089     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8090     store_reg(s, a->rt, tmp);
8091
8092     tcg_gen_addi_i32(addr, addr, 4);
8093
8094     tmp = tcg_temp_new_i32();
8095     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8096     store_reg(s, rt2, tmp);
8097
8098     /* LDRD w/ base writeback is undefined if the registers overlap.  */
8099     op_addr_ri_post(s, a, addr, -4);
8100     return true;
8101 }
8102
8103 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8104 {
8105     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8106         return false;
8107     }
8108     return op_ldrd_ri(s, a, a->rt + 1);
8109 }
8110
8111 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8112 {
8113     arg_ldst_ri b = {
8114         .u = a->u, .w = a->w, .p = a->p,
8115         .rn = a->rn, .rt = a->rt, .imm = a->imm
8116     };
8117     return op_ldrd_ri(s, &b, a->rt2);
8118 }
8119
8120 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8121 {
8122     int mem_idx = get_mem_index(s);
8123     TCGv_i32 addr, tmp;
8124
8125     addr = op_addr_ri_pre(s, a);
8126
8127     tmp = load_reg(s, a->rt);
8128     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8129     tcg_temp_free_i32(tmp);
8130
8131     tcg_gen_addi_i32(addr, addr, 4);
8132
8133     tmp = load_reg(s, rt2);
8134     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8135     tcg_temp_free_i32(tmp);
8136
8137     op_addr_ri_post(s, a, addr, -4);
8138     return true;
8139 }
8140
8141 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8142 {
8143     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8144         return false;
8145     }
8146     return op_strd_ri(s, a, a->rt + 1);
8147 }
8148
8149 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8150 {
8151     arg_ldst_ri b = {
8152         .u = a->u, .w = a->w, .p = a->p,
8153         .rn = a->rn, .rt = a->rt, .imm = a->imm
8154     };
8155     return op_strd_ri(s, &b, a->rt2);
8156 }
8157
8158 #define DO_LDST(NAME, WHICH, MEMOP) \
8159 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
8160 {                                                                     \
8161     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
8162 }                                                                     \
8163 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
8164 {                                                                     \
8165     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
8166 }                                                                     \
8167 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
8168 {                                                                     \
8169     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
8170 }                                                                     \
8171 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
8172 {                                                                     \
8173     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
8174 }
8175
8176 DO_LDST(LDR, load, MO_UL)
8177 DO_LDST(LDRB, load, MO_UB)
8178 DO_LDST(LDRH, load, MO_UW)
8179 DO_LDST(LDRSB, load, MO_SB)
8180 DO_LDST(LDRSH, load, MO_SW)
8181
8182 DO_LDST(STR, store, MO_UL)
8183 DO_LDST(STRB, store, MO_UB)
8184 DO_LDST(STRH, store, MO_UW)
8185
8186 #undef DO_LDST
8187
8188 /*
8189  * Synchronization primitives
8190  */
8191
8192 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
8193 {
8194     TCGv_i32 addr, tmp;
8195     TCGv taddr;
8196
8197     opc |= s->be_data;
8198     addr = load_reg(s, a->rn);
8199     taddr = gen_aa32_addr(s, addr, opc);
8200     tcg_temp_free_i32(addr);
8201
8202     tmp = load_reg(s, a->rt2);
8203     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
8204     tcg_temp_free(taddr);
8205
8206     store_reg(s, a->rt, tmp);
8207     return true;
8208 }
8209
8210 static bool trans_SWP(DisasContext *s, arg_SWP *a)
8211 {
8212     return op_swp(s, a, MO_UL | MO_ALIGN);
8213 }
8214
8215 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
8216 {
8217     return op_swp(s, a, MO_UB);
8218 }
8219
8220 /*
8221  * Load/Store Exclusive and Load-Acquire/Store-Release
8222  */
8223
8224 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
8225 {
8226     TCGv_i32 addr;
8227     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
8228     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
8229
8230     /* We UNDEF for these UNPREDICTABLE cases.  */
8231     if (a->rd == 15 || a->rn == 15 || a->rt == 15
8232         || a->rd == a->rn || a->rd == a->rt
8233         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
8234         || (mop == MO_64
8235             && (a->rt2 == 15
8236                 || a->rd == a->rt2
8237                 || (!v8a && s->thumb && a->rt2 == 13)))) {
8238         unallocated_encoding(s);
8239         return true;
8240     }
8241
8242     if (rel) {
8243         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8244     }
8245
8246     addr = tcg_temp_local_new_i32();
8247     load_reg_var(s, addr, a->rn);
8248     tcg_gen_addi_i32(addr, addr, a->imm);
8249
8250     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
8251     tcg_temp_free_i32(addr);
8252     return true;
8253 }
8254
8255 static bool trans_STREX(DisasContext *s, arg_STREX *a)
8256 {
8257     if (!ENABLE_ARCH_6) {
8258         return false;
8259     }
8260     return op_strex(s, a, MO_32, false);
8261 }
8262
8263 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
8264 {
8265     if (!ENABLE_ARCH_6K) {
8266         return false;
8267     }
8268     /* We UNDEF for these UNPREDICTABLE cases.  */
8269     if (a->rt & 1) {
8270         unallocated_encoding(s);
8271         return true;
8272     }
8273     a->rt2 = a->rt + 1;
8274     return op_strex(s, a, MO_64, false);
8275 }
8276
8277 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
8278 {
8279     return op_strex(s, a, MO_64, false);
8280 }
8281
8282 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
8283 {
8284     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8285         return false;
8286     }
8287     return op_strex(s, a, MO_8, false);
8288 }
8289
8290 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
8291 {
8292     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8293         return false;
8294     }
8295     return op_strex(s, a, MO_16, false);
8296 }
8297
8298 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
8299 {
8300     if (!ENABLE_ARCH_8) {
8301         return false;
8302     }
8303     return op_strex(s, a, MO_32, true);
8304 }
8305
8306 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
8307 {
8308     if (!ENABLE_ARCH_8) {
8309         return false;
8310     }
8311     /* We UNDEF for these UNPREDICTABLE cases.  */
8312     if (a->rt & 1) {
8313         unallocated_encoding(s);
8314         return true;
8315     }
8316     a->rt2 = a->rt + 1;
8317     return op_strex(s, a, MO_64, true);
8318 }
8319
8320 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
8321 {
8322     if (!ENABLE_ARCH_8) {
8323         return false;
8324     }
8325     return op_strex(s, a, MO_64, true);
8326 }
8327
8328 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
8329 {
8330     if (!ENABLE_ARCH_8) {
8331         return false;
8332     }
8333     return op_strex(s, a, MO_8, true);
8334 }
8335
8336 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
8337 {
8338     if (!ENABLE_ARCH_8) {
8339         return false;
8340     }
8341     return op_strex(s, a, MO_16, true);
8342 }
8343
8344 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
8345 {
8346     TCGv_i32 addr, tmp;
8347
8348     if (!ENABLE_ARCH_8) {
8349         return false;
8350     }
8351     /* We UNDEF for these UNPREDICTABLE cases.  */
8352     if (a->rn == 15 || a->rt == 15) {
8353         unallocated_encoding(s);
8354         return true;
8355     }
8356
8357     addr = load_reg(s, a->rn);
8358     tmp = load_reg(s, a->rt);
8359     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8360     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
8361     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
8362
8363     tcg_temp_free_i32(tmp);
8364     tcg_temp_free_i32(addr);
8365     return true;
8366 }
8367
8368 static bool trans_STL(DisasContext *s, arg_STL *a)
8369 {
8370     return op_stl(s, a, MO_UL);
8371 }
8372
8373 static bool trans_STLB(DisasContext *s, arg_STL *a)
8374 {
8375     return op_stl(s, a, MO_UB);
8376 }
8377
8378 static bool trans_STLH(DisasContext *s, arg_STL *a)
8379 {
8380     return op_stl(s, a, MO_UW);
8381 }
8382
8383 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
8384 {
8385     TCGv_i32 addr;
8386     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
8387     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
8388
8389     /* We UNDEF for these UNPREDICTABLE cases.  */
8390     if (a->rn == 15 || a->rt == 15
8391         || (!v8a && s->thumb && a->rt == 13)
8392         || (mop == MO_64
8393             && (a->rt2 == 15 || a->rt == a->rt2
8394                 || (!v8a && s->thumb && a->rt2 == 13)))) {
8395         unallocated_encoding(s);
8396         return true;
8397     }
8398
8399     addr = tcg_temp_local_new_i32();
8400     load_reg_var(s, addr, a->rn);
8401     tcg_gen_addi_i32(addr, addr, a->imm);
8402
8403     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
8404     tcg_temp_free_i32(addr);
8405
8406     if (acq) {
8407         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
8408     }
8409     return true;
8410 }
8411
8412 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
8413 {
8414     if (!ENABLE_ARCH_6) {
8415         return false;
8416     }
8417     return op_ldrex(s, a, MO_32, false);
8418 }
8419
8420 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
8421 {
8422     if (!ENABLE_ARCH_6K) {
8423         return false;
8424     }
8425     /* We UNDEF for these UNPREDICTABLE cases.  */
8426     if (a->rt & 1) {
8427         unallocated_encoding(s);
8428         return true;
8429     }
8430     a->rt2 = a->rt + 1;
8431     return op_ldrex(s, a, MO_64, false);
8432 }
8433
8434 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
8435 {
8436     return op_ldrex(s, a, MO_64, false);
8437 }
8438
8439 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
8440 {
8441     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8442         return false;
8443     }
8444     return op_ldrex(s, a, MO_8, false);
8445 }
8446
8447 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
8448 {
8449     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8450         return false;
8451     }
8452     return op_ldrex(s, a, MO_16, false);
8453 }
8454
8455 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
8456 {
8457     if (!ENABLE_ARCH_8) {
8458         return false;
8459     }
8460     return op_ldrex(s, a, MO_32, true);
8461 }
8462
8463 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
8464 {
8465     if (!ENABLE_ARCH_8) {
8466         return false;
8467     }
8468     /* We UNDEF for these UNPREDICTABLE cases.  */
8469     if (a->rt & 1) {
8470         unallocated_encoding(s);
8471         return true;
8472     }
8473     a->rt2 = a->rt + 1;
8474     return op_ldrex(s, a, MO_64, true);
8475 }
8476
8477 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
8478 {
8479     if (!ENABLE_ARCH_8) {
8480         return false;
8481     }
8482     return op_ldrex(s, a, MO_64, true);
8483 }
8484
8485 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
8486 {
8487     if (!ENABLE_ARCH_8) {
8488         return false;
8489     }
8490     return op_ldrex(s, a, MO_8, true);
8491 }
8492
8493 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
8494 {
8495     if (!ENABLE_ARCH_8) {
8496         return false;
8497     }
8498     return op_ldrex(s, a, MO_16, true);
8499 }
8500
8501 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
8502 {
8503     TCGv_i32 addr, tmp;
8504
8505     if (!ENABLE_ARCH_8) {
8506         return false;
8507     }
8508     /* We UNDEF for these UNPREDICTABLE cases.  */
8509     if (a->rn == 15 || a->rt == 15) {
8510         unallocated_encoding(s);
8511         return true;
8512     }
8513
8514     addr = load_reg(s, a->rn);
8515     tmp = tcg_temp_new_i32();
8516     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
8517     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
8518     tcg_temp_free_i32(addr);
8519
8520     store_reg(s, a->rt, tmp);
8521     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8522     return true;
8523 }
8524
8525 static bool trans_LDA(DisasContext *s, arg_LDA *a)
8526 {
8527     return op_lda(s, a, MO_UL);
8528 }
8529
8530 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
8531 {
8532     return op_lda(s, a, MO_UB);
8533 }
8534
8535 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
8536 {
8537     return op_lda(s, a, MO_UW);
8538 }
8539
8540 /*
8541  * Media instructions
8542  */
8543
8544 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
8545 {
8546     TCGv_i32 t1, t2;
8547
8548     if (!ENABLE_ARCH_6) {
8549         return false;
8550     }
8551
8552     t1 = load_reg(s, a->rn);
8553     t2 = load_reg(s, a->rm);
8554     gen_helper_usad8(t1, t1, t2);
8555     tcg_temp_free_i32(t2);
8556     if (a->ra != 15) {
8557         t2 = load_reg(s, a->ra);
8558         tcg_gen_add_i32(t1, t1, t2);
8559         tcg_temp_free_i32(t2);
8560     }
8561     store_reg(s, a->rd, t1);
8562     return true;
8563 }
8564
8565 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
8566 {
8567     TCGv_i32 tmp;
8568     int width = a->widthm1 + 1;
8569     int shift = a->lsb;
8570
8571     if (!ENABLE_ARCH_6T2) {
8572         return false;
8573     }
8574     if (shift + width > 32) {
8575         /* UNPREDICTABLE; we choose to UNDEF */
8576         unallocated_encoding(s);
8577         return true;
8578     }
8579
8580     tmp = load_reg(s, a->rn);
8581     if (u) {
8582         tcg_gen_extract_i32(tmp, tmp, shift, width);
8583     } else {
8584         tcg_gen_sextract_i32(tmp, tmp, shift, width);
8585     }
8586     store_reg(s, a->rd, tmp);
8587     return true;
8588 }
8589
8590 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
8591 {
8592     return op_bfx(s, a, false);
8593 }
8594
8595 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
8596 {
8597     return op_bfx(s, a, true);
8598 }
8599
8600 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
8601 {
8602     TCGv_i32 tmp;
8603     int msb = a->msb, lsb = a->lsb;
8604     int width;
8605
8606     if (!ENABLE_ARCH_6T2) {
8607         return false;
8608     }
8609     if (msb < lsb) {
8610         /* UNPREDICTABLE; we choose to UNDEF */
8611         unallocated_encoding(s);
8612         return true;
8613     }
8614
8615     width = msb + 1 - lsb;
8616     if (a->rn == 15) {
8617         /* BFC */
8618         tmp = tcg_const_i32(0);
8619     } else {
8620         /* BFI */
8621         tmp = load_reg(s, a->rn);
8622     }
8623     if (width != 32) {
8624         TCGv_i32 tmp2 = load_reg(s, a->rd);
8625         tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
8626         tcg_temp_free_i32(tmp2);
8627     }
8628     store_reg(s, a->rd, tmp);
8629     return true;
8630 }
8631
8632 static bool trans_UDF(DisasContext *s, arg_UDF *a)
8633 {
8634     unallocated_encoding(s);
8635     return true;
8636 }
8637
8638 /*
8639  * Parallel addition and subtraction
8640  */
8641
8642 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
8643                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
8644 {
8645     TCGv_i32 t0, t1;
8646
8647     if (s->thumb
8648         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8649         : !ENABLE_ARCH_6) {
8650         return false;
8651     }
8652
8653     t0 = load_reg(s, a->rn);
8654     t1 = load_reg(s, a->rm);
8655
8656     gen(t0, t0, t1);
8657
8658     tcg_temp_free_i32(t1);
8659     store_reg(s, a->rd, t0);
8660     return true;
8661 }
8662
8663 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
8664                              void (*gen)(TCGv_i32, TCGv_i32,
8665                                          TCGv_i32, TCGv_ptr))
8666 {
8667     TCGv_i32 t0, t1;
8668     TCGv_ptr ge;
8669
8670     if (s->thumb
8671         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8672         : !ENABLE_ARCH_6) {
8673         return false;
8674     }
8675
8676     t0 = load_reg(s, a->rn);
8677     t1 = load_reg(s, a->rm);
8678
8679     ge = tcg_temp_new_ptr();
8680     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
8681     gen(t0, t0, t1, ge);
8682
8683     tcg_temp_free_ptr(ge);
8684     tcg_temp_free_i32(t1);
8685     store_reg(s, a->rd, t0);
8686     return true;
8687 }
8688
8689 #define DO_PAR_ADDSUB(NAME, helper) \
8690 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
8691 {                                                       \
8692     return op_par_addsub(s, a, helper);                 \
8693 }
8694
8695 #define DO_PAR_ADDSUB_GE(NAME, helper) \
8696 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
8697 {                                                       \
8698     return op_par_addsub_ge(s, a, helper);              \
8699 }
8700
8701 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
8702 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
8703 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
8704 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
8705 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
8706 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
8707
8708 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
8709 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
8710 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
8711 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
8712 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
8713 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
8714
8715 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
8716 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
8717 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
8718 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
8719 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
8720 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
8721
8722 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
8723 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
8724 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
8725 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
8726 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
8727 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
8728
8729 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
8730 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
8731 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
8732 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
8733 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
8734 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
8735
8736 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
8737 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
8738 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
8739 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
8740 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
8741 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
8742
8743 #undef DO_PAR_ADDSUB
8744 #undef DO_PAR_ADDSUB_GE
8745
8746 /*
8747  * Packing, unpacking, saturation, and reversal
8748  */
8749
8750 static bool trans_PKH(DisasContext *s, arg_PKH *a)
8751 {
8752     TCGv_i32 tn, tm;
8753     int shift = a->imm;
8754
8755     if (s->thumb
8756         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8757         : !ENABLE_ARCH_6) {
8758         return false;
8759     }
8760
8761     tn = load_reg(s, a->rn);
8762     tm = load_reg(s, a->rm);
8763     if (a->tb) {
8764         /* PKHTB */
8765         if (shift == 0) {
8766             shift = 31;
8767         }
8768         tcg_gen_sari_i32(tm, tm, shift);
8769         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
8770     } else {
8771         /* PKHBT */
8772         tcg_gen_shli_i32(tm, tm, shift);
8773         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
8774     }
8775     tcg_temp_free_i32(tm);
8776     store_reg(s, a->rd, tn);
8777     return true;
8778 }
8779
8780 static bool op_sat(DisasContext *s, arg_sat *a,
8781                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
8782 {
8783     TCGv_i32 tmp, satimm;
8784     int shift = a->imm;
8785
8786     if (!ENABLE_ARCH_6) {
8787         return false;
8788     }
8789
8790     tmp = load_reg(s, a->rn);
8791     if (a->sh) {
8792         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
8793     } else {
8794         tcg_gen_shli_i32(tmp, tmp, shift);
8795     }
8796
8797     satimm = tcg_const_i32(a->satimm);
8798     gen(tmp, cpu_env, tmp, satimm);
8799     tcg_temp_free_i32(satimm);
8800
8801     store_reg(s, a->rd, tmp);
8802     return true;
8803 }
8804
8805 static bool trans_SSAT(DisasContext *s, arg_sat *a)
8806 {
8807     return op_sat(s, a, gen_helper_ssat);
8808 }
8809
8810 static bool trans_USAT(DisasContext *s, arg_sat *a)
8811 {
8812     return op_sat(s, a, gen_helper_usat);
8813 }
8814
8815 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
8816 {
8817     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
8818         return false;
8819     }
8820     return op_sat(s, a, gen_helper_ssat16);
8821 }
8822
8823 static bool trans_USAT16(DisasContext *s, arg_sat *a)
8824 {
8825     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
8826         return false;
8827     }
8828     return op_sat(s, a, gen_helper_usat16);
8829 }
8830
8831 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
8832                    void (*gen_extract)(TCGv_i32, TCGv_i32),
8833                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
8834 {
8835     TCGv_i32 tmp;
8836
8837     if (!ENABLE_ARCH_6) {
8838         return false;
8839     }
8840
8841     tmp = load_reg(s, a->rm);
8842     /*
8843      * TODO: In many cases we could do a shift instead of a rotate.
8844      * Combined with a simple extend, that becomes an extract.
8845      */
8846     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
8847     gen_extract(tmp, tmp);
8848
8849     if (a->rn != 15) {
8850         TCGv_i32 tmp2 = load_reg(s, a->rn);
8851         gen_add(tmp, tmp, tmp2);
8852         tcg_temp_free_i32(tmp2);
8853     }
8854     store_reg(s, a->rd, tmp);
8855     return true;
8856 }
8857
8858 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
8859 {
8860     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
8861 }
8862
8863 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
8864 {
8865     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
8866 }
8867
8868 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
8869 {
8870     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
8871         return false;
8872     }
8873     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
8874 }
8875
8876 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
8877 {
8878     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
8879 }
8880
8881 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
8882 {
8883     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
8884 }
8885
8886 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
8887 {
8888     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
8889         return false;
8890     }
8891     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
8892 }
8893
8894 static bool trans_SEL(DisasContext *s, arg_rrr *a)
8895 {
8896     TCGv_i32 t1, t2, t3;
8897
8898     if (s->thumb
8899         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8900         : !ENABLE_ARCH_6) {
8901         return false;
8902     }
8903
8904     t1 = load_reg(s, a->rn);
8905     t2 = load_reg(s, a->rm);
8906     t3 = tcg_temp_new_i32();
8907     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
8908     gen_helper_sel_flags(t1, t3, t1, t2);
8909     tcg_temp_free_i32(t3);
8910     tcg_temp_free_i32(t2);
8911     store_reg(s, a->rd, t1);
8912     return true;
8913 }
8914
8915 static bool op_rr(DisasContext *s, arg_rr *a,
8916                   void (*gen)(TCGv_i32, TCGv_i32))
8917 {
8918     TCGv_i32 tmp;
8919
8920     tmp = load_reg(s, a->rm);
8921     gen(tmp, tmp);
8922     store_reg(s, a->rd, tmp);
8923     return true;
8924 }
8925
8926 static bool trans_REV(DisasContext *s, arg_rr *a)
8927 {
8928     if (!ENABLE_ARCH_6) {
8929         return false;
8930     }
8931     return op_rr(s, a, tcg_gen_bswap32_i32);
8932 }
8933
8934 static bool trans_REV16(DisasContext *s, arg_rr *a)
8935 {
8936     if (!ENABLE_ARCH_6) {
8937         return false;
8938     }
8939     return op_rr(s, a, gen_rev16);
8940 }
8941
8942 static bool trans_REVSH(DisasContext *s, arg_rr *a)
8943 {
8944     if (!ENABLE_ARCH_6) {
8945         return false;
8946     }
8947     return op_rr(s, a, gen_revsh);
8948 }
8949
8950 static bool trans_RBIT(DisasContext *s, arg_rr *a)
8951 {
8952     if (!ENABLE_ARCH_6T2) {
8953         return false;
8954     }
8955     return op_rr(s, a, gen_helper_rbit);
8956 }
8957
8958 /*
8959  * Signed multiply, signed and unsigned divide
8960  */
8961
8962 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
8963 {
8964     TCGv_i32 t1, t2;
8965
8966     if (!ENABLE_ARCH_6) {
8967         return false;
8968     }
8969
8970     t1 = load_reg(s, a->rn);
8971     t2 = load_reg(s, a->rm);
8972     if (m_swap) {
8973         gen_swap_half(t2);
8974     }
8975     gen_smul_dual(t1, t2);
8976
8977     if (sub) {
8978         /* This subtraction cannot overflow. */
8979         tcg_gen_sub_i32(t1, t1, t2);
8980     } else {
8981         /*
8982          * This addition cannot overflow 32 bits; however it may
8983          * overflow considered as a signed operation, in which case
8984          * we must set the Q flag.
8985          */
8986         gen_helper_add_setq(t1, cpu_env, t1, t2);
8987     }
8988     tcg_temp_free_i32(t2);
8989
8990     if (a->ra != 15) {
8991         t2 = load_reg(s, a->ra);
8992         gen_helper_add_setq(t1, cpu_env, t1, t2);
8993         tcg_temp_free_i32(t2);
8994     }
8995     store_reg(s, a->rd, t1);
8996     return true;
8997 }
8998
8999 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
9000 {
9001     return op_smlad(s, a, false, false);
9002 }
9003
9004 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
9005 {
9006     return op_smlad(s, a, true, false);
9007 }
9008
9009 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
9010 {
9011     return op_smlad(s, a, false, true);
9012 }
9013
9014 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
9015 {
9016     return op_smlad(s, a, true, true);
9017 }
9018
9019 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
9020 {
9021     TCGv_i32 t1, t2;
9022     TCGv_i64 l1, l2;
9023
9024     if (!ENABLE_ARCH_6) {
9025         return false;
9026     }
9027
9028     t1 = load_reg(s, a->rn);
9029     t2 = load_reg(s, a->rm);
9030     if (m_swap) {
9031         gen_swap_half(t2);
9032     }
9033     gen_smul_dual(t1, t2);
9034
9035     l1 = tcg_temp_new_i64();
9036     l2 = tcg_temp_new_i64();
9037     tcg_gen_ext_i32_i64(l1, t1);
9038     tcg_gen_ext_i32_i64(l2, t2);
9039     tcg_temp_free_i32(t1);
9040     tcg_temp_free_i32(t2);
9041
9042     if (sub) {
9043         tcg_gen_sub_i64(l1, l1, l2);
9044     } else {
9045         tcg_gen_add_i64(l1, l1, l2);
9046     }
9047     tcg_temp_free_i64(l2);
9048
9049     gen_addq(s, l1, a->ra, a->rd);
9050     gen_storeq_reg(s, a->ra, a->rd, l1);
9051     tcg_temp_free_i64(l1);
9052     return true;
9053 }
9054
9055 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
9056 {
9057     return op_smlald(s, a, false, false);
9058 }
9059
9060 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
9061 {
9062     return op_smlald(s, a, true, false);
9063 }
9064
9065 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
9066 {
9067     return op_smlald(s, a, false, true);
9068 }
9069
9070 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
9071 {
9072     return op_smlald(s, a, true, true);
9073 }
9074
9075 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
9076 {
9077     TCGv_i32 t1, t2;
9078
9079     if (s->thumb
9080         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9081         : !ENABLE_ARCH_6) {
9082         return false;
9083     }
9084
9085     t1 = load_reg(s, a->rn);
9086     t2 = load_reg(s, a->rm);
9087     tcg_gen_muls2_i32(t2, t1, t1, t2);
9088
9089     if (a->ra != 15) {
9090         TCGv_i32 t3 = load_reg(s, a->ra);
9091         if (sub) {
9092             /*
9093              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
9094              * a non-zero multiplicand lowpart, and the correct result
9095              * lowpart for rounding.
9096              */
9097             TCGv_i32 zero = tcg_const_i32(0);
9098             tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
9099             tcg_temp_free_i32(zero);
9100         } else {
9101             tcg_gen_add_i32(t1, t1, t3);
9102         }
9103         tcg_temp_free_i32(t3);
9104     }
9105     if (round) {
9106         /*
9107          * Adding 0x80000000 to the 64-bit quantity means that we have
9108          * carry in to the high word when the low word has the msb set.
9109          */
9110         tcg_gen_shri_i32(t2, t2, 31);
9111         tcg_gen_add_i32(t1, t1, t2);
9112     }
9113     tcg_temp_free_i32(t2);
9114     store_reg(s, a->rd, t1);
9115     return true;
9116 }
9117
9118 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
9119 {
9120     return op_smmla(s, a, false, false);
9121 }
9122
9123 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
9124 {
9125     return op_smmla(s, a, true, false);
9126 }
9127
9128 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
9129 {
9130     return op_smmla(s, a, false, true);
9131 }
9132
9133 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
9134 {
9135     return op_smmla(s, a, true, true);
9136 }
9137
9138 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
9139 {
9140     TCGv_i32 t1, t2;
9141
9142     if (s->thumb
9143         ? !dc_isar_feature(aa32_thumb_div, s)
9144         : !dc_isar_feature(aa32_arm_div, s)) {
9145         return false;
9146     }
9147
9148     t1 = load_reg(s, a->rn);
9149     t2 = load_reg(s, a->rm);
9150     if (u) {
9151         gen_helper_udiv(t1, t1, t2);
9152     } else {
9153         gen_helper_sdiv(t1, t1, t2);
9154     }
9155     tcg_temp_free_i32(t2);
9156     store_reg(s, a->rd, t1);
9157     return true;
9158 }
9159
9160 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
9161 {
9162     return op_div(s, a, false);
9163 }
9164
9165 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
9166 {
9167     return op_div(s, a, true);
9168 }
9169
9170 /*
9171  * Block data transfer
9172  */
9173
9174 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
9175 {
9176     TCGv_i32 addr = load_reg(s, a->rn);
9177
9178     if (a->b) {
9179         if (a->i) {
9180             /* pre increment */
9181             tcg_gen_addi_i32(addr, addr, 4);
9182         } else {
9183             /* pre decrement */
9184             tcg_gen_addi_i32(addr, addr, -(n * 4));
9185         }
9186     } else if (!a->i && n != 1) {
9187         /* post decrement */
9188         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9189     }
9190
9191     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
9192         /*
9193          * If the writeback is incrementing SP rather than
9194          * decrementing it, and the initial SP is below the
9195          * stack limit but the final written-back SP would
9196          * be above, then then we must not perform any memory
9197          * accesses, but it is IMPDEF whether we generate
9198          * an exception. We choose to do so in this case.
9199          * At this point 'addr' is the lowest address, so
9200          * either the original SP (if incrementing) or our
9201          * final SP (if decrementing), so that's what we check.
9202          */
9203         gen_helper_v8m_stackcheck(cpu_env, addr);
9204     }
9205
9206     return addr;
9207 }
9208
9209 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
9210                                TCGv_i32 addr, int n)
9211 {
9212     if (a->w) {
9213         /* write back */
9214         if (!a->b) {
9215             if (a->i) {
9216                 /* post increment */
9217                 tcg_gen_addi_i32(addr, addr, 4);
9218             } else {
9219                 /* post decrement */
9220                 tcg_gen_addi_i32(addr, addr, -(n * 4));
9221             }
9222         } else if (!a->i && n != 1) {
9223             /* pre decrement */
9224             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9225         }
9226         store_reg(s, a->rn, addr);
9227     } else {
9228         tcg_temp_free_i32(addr);
9229     }
9230 }
9231
9232 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
9233 {
9234     int i, j, n, list, mem_idx;
9235     bool user = a->u;
9236     TCGv_i32 addr, tmp, tmp2;
9237
9238     if (user) {
9239         /* STM (user) */
9240         if (IS_USER(s)) {
9241             /* Only usable in supervisor mode.  */
9242             unallocated_encoding(s);
9243             return true;
9244         }
9245     }
9246
9247     list = a->list;
9248     n = ctpop16(list);
9249     if (n < min_n || a->rn == 15) {
9250         unallocated_encoding(s);
9251         return true;
9252     }
9253
9254     addr = op_addr_block_pre(s, a, n);
9255     mem_idx = get_mem_index(s);
9256
9257     for (i = j = 0; i < 16; i++) {
9258         if (!(list & (1 << i))) {
9259             continue;
9260         }
9261
9262         if (user && i != 15) {
9263             tmp = tcg_temp_new_i32();
9264             tmp2 = tcg_const_i32(i);
9265             gen_helper_get_user_reg(tmp, cpu_env, tmp2);
9266             tcg_temp_free_i32(tmp2);
9267         } else {
9268             tmp = load_reg(s, i);
9269         }
9270         gen_aa32_st32(s, tmp, addr, mem_idx);
9271         tcg_temp_free_i32(tmp);
9272
9273         /* No need to add after the last transfer.  */
9274         if (++j != n) {
9275             tcg_gen_addi_i32(addr, addr, 4);
9276         }
9277     }
9278
9279     op_addr_block_post(s, a, addr, n);
9280     return true;
9281 }
9282
9283 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
9284 {
9285     /* BitCount(list) < 1 is UNPREDICTABLE */
9286     return op_stm(s, a, 1);
9287 }
9288
9289 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
9290 {
9291     /* Writeback register in register list is UNPREDICTABLE for T32.  */
9292     if (a->w && (a->list & (1 << a->rn))) {
9293         unallocated_encoding(s);
9294         return true;
9295     }
9296     /* BitCount(list) < 2 is UNPREDICTABLE */
9297     return op_stm(s, a, 2);
9298 }
9299
9300 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
9301 {
9302     int i, j, n, list, mem_idx;
9303     bool loaded_base;
9304     bool user = a->u;
9305     bool exc_return = false;
9306     TCGv_i32 addr, tmp, tmp2, loaded_var;
9307
9308     if (user) {
9309         /* LDM (user), LDM (exception return) */
9310         if (IS_USER(s)) {
9311             /* Only usable in supervisor mode.  */
9312             unallocated_encoding(s);
9313             return true;
9314         }
9315         if (extract32(a->list, 15, 1)) {
9316             exc_return = true;
9317             user = false;
9318         } else {
9319             /* LDM (user) does not allow writeback.  */
9320             if (a->w) {
9321                 unallocated_encoding(s);
9322                 return true;
9323             }
9324         }
9325     }
9326
9327     list = a->list;
9328     n = ctpop16(list);
9329     if (n < min_n || a->rn == 15) {
9330         unallocated_encoding(s);
9331         return true;
9332     }
9333
9334     addr = op_addr_block_pre(s, a, n);
9335     mem_idx = get_mem_index(s);
9336     loaded_base = false;
9337     loaded_var = NULL;
9338
9339     for (i = j = 0; i < 16; i++) {
9340         if (!(list & (1 << i))) {
9341             continue;
9342         }
9343
9344         tmp = tcg_temp_new_i32();
9345         gen_aa32_ld32u(s, tmp, addr, mem_idx);
9346         if (user) {
9347             tmp2 = tcg_const_i32(i);
9348             gen_helper_set_user_reg(cpu_env, tmp2, tmp);
9349             tcg_temp_free_i32(tmp2);
9350             tcg_temp_free_i32(tmp);
9351         } else if (i == a->rn) {
9352             loaded_var = tmp;
9353             loaded_base = true;
9354         } else if (i == 15 && exc_return) {
9355             store_pc_exc_ret(s, tmp);
9356         } else {
9357             store_reg_from_load(s, i, tmp);
9358         }
9359
9360         /* No need to add after the last transfer.  */
9361         if (++j != n) {
9362             tcg_gen_addi_i32(addr, addr, 4);
9363         }
9364     }
9365
9366     op_addr_block_post(s, a, addr, n);
9367
9368     if (loaded_base) {
9369         /* Note that we reject base == pc above.  */
9370         store_reg(s, a->rn, loaded_var);
9371     }
9372
9373     if (exc_return) {
9374         /* Restore CPSR from SPSR.  */
9375         tmp = load_cpu_field(spsr);
9376         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
9377             gen_io_start();
9378         }
9379         gen_helper_cpsr_write_eret(cpu_env, tmp);
9380         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
9381             gen_io_end();
9382         }
9383         tcg_temp_free_i32(tmp);
9384         /* Must exit loop to check un-masked IRQs */
9385         s->base.is_jmp = DISAS_EXIT;
9386     }
9387     return true;
9388 }
9389
9390 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
9391 {
9392     /*
9393      * Writeback register in register list is UNPREDICTABLE
9394      * for ArchVersion() >= 7.  Prior to v7, A32 would write
9395      * an UNKNOWN value to the base register.
9396      */
9397     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
9398         unallocated_encoding(s);
9399         return true;
9400     }
9401     /* BitCount(list) < 1 is UNPREDICTABLE */
9402     return do_ldm(s, a, 1);
9403 }
9404
9405 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
9406 {
9407     /* Writeback register in register list is UNPREDICTABLE for T32. */
9408     if (a->w && (a->list & (1 << a->rn))) {
9409         unallocated_encoding(s);
9410         return true;
9411     }
9412     /* BitCount(list) < 2 is UNPREDICTABLE */
9413     return do_ldm(s, a, 2);
9414 }
9415
9416 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
9417 {
9418     /* Writeback is conditional on the base register not being loaded.  */
9419     a->w = !(a->list & (1 << a->rn));
9420     /* BitCount(list) < 1 is UNPREDICTABLE */
9421     return do_ldm(s, a, 1);
9422 }
9423
9424 /*
9425  * Branch, branch with link
9426  */
9427
9428 static bool trans_B(DisasContext *s, arg_i *a)
9429 {
9430     gen_jmp(s, read_pc(s) + a->imm);
9431     return true;
9432 }
9433
9434 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
9435 {
9436     /* This has cond from encoding, required to be outside IT block.  */
9437     if (a->cond >= 0xe) {
9438         return false;
9439     }
9440     if (s->condexec_mask) {
9441         unallocated_encoding(s);
9442         return true;
9443     }
9444     arm_skip_unless(s, a->cond);
9445     gen_jmp(s, read_pc(s) + a->imm);
9446     return true;
9447 }
9448
9449 static bool trans_BL(DisasContext *s, arg_i *a)
9450 {
9451     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
9452     gen_jmp(s, read_pc(s) + a->imm);
9453     return true;
9454 }
9455
9456 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
9457 {
9458     TCGv_i32 tmp;
9459
9460     /* For A32, ARCH(5) is checked near the start of the uncond block. */
9461     if (s->thumb && (a->imm & 2)) {
9462         return false;
9463     }
9464     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
9465     tmp = tcg_const_i32(!s->thumb);
9466     store_cpu_field(tmp, thumb);
9467     gen_jmp(s, (read_pc(s) & ~3) + a->imm);
9468     return true;
9469 }
9470
9471 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
9472 {
9473     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
9474     tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
9475     return true;
9476 }
9477
9478 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
9479 {
9480     TCGv_i32 tmp = tcg_temp_new_i32();
9481
9482     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
9483     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
9484     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
9485     gen_bx(s, tmp);
9486     return true;
9487 }
9488
9489 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
9490 {
9491     TCGv_i32 tmp;
9492
9493     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
9494     if (!ENABLE_ARCH_5) {
9495         return false;
9496     }
9497     tmp = tcg_temp_new_i32();
9498     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
9499     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
9500     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
9501     gen_bx(s, tmp);
9502     return true;
9503 }
9504
9505 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
9506 {
9507     TCGv_i32 addr, tmp;
9508
9509     tmp = load_reg(s, a->rm);
9510     if (half) {
9511         tcg_gen_add_i32(tmp, tmp, tmp);
9512     }
9513     addr = load_reg(s, a->rn);
9514     tcg_gen_add_i32(addr, addr, tmp);
9515
9516     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
9517                     half ? MO_UW | s->be_data : MO_UB);
9518     tcg_temp_free_i32(addr);
9519
9520     tcg_gen_add_i32(tmp, tmp, tmp);
9521     tcg_gen_addi_i32(tmp, tmp, read_pc(s));
9522     store_reg(s, 15, tmp);
9523     return true;
9524 }
9525
9526 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
9527 {
9528     return op_tbranch(s, a, false);
9529 }
9530
9531 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
9532 {
9533     return op_tbranch(s, a, true);
9534 }
9535
9536 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
9537 {
9538     TCGv_i32 tmp = load_reg(s, a->rn);
9539
9540     arm_gen_condlabel(s);
9541     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
9542                         tmp, 0, s->condlabel);
9543     tcg_temp_free_i32(tmp);
9544     gen_jmp(s, read_pc(s) + a->imm);
9545     return true;
9546 }
9547
9548 /*
9549  * Supervisor call - both T32 & A32 come here so we need to check
9550  * which mode we are in when checking for semihosting.
9551  */
9552
9553 static bool trans_SVC(DisasContext *s, arg_SVC *a)
9554 {
9555     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
9556
9557     if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
9558 #ifndef CONFIG_USER_ONLY
9559         !IS_USER(s) &&
9560 #endif
9561         (a->imm == semihost_imm)) {
9562         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
9563     } else {
9564         gen_set_pc_im(s, s->base.pc_next);
9565         s->svc_imm = a->imm;
9566         s->base.is_jmp = DISAS_SWI;
9567     }
9568     return true;
9569 }
9570
9571 /*
9572  * Unconditional system instructions
9573  */
9574
9575 static bool trans_RFE(DisasContext *s, arg_RFE *a)
9576 {
9577     static const int8_t pre_offset[4] = {
9578         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
9579     };
9580     static const int8_t post_offset[4] = {
9581         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
9582     };
9583     TCGv_i32 addr, t1, t2;
9584
9585     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
9586         return false;
9587     }
9588     if (IS_USER(s)) {
9589         unallocated_encoding(s);
9590         return true;
9591     }
9592
9593     addr = load_reg(s, a->rn);
9594     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
9595
9596     /* Load PC into tmp and CPSR into tmp2.  */
9597     t1 = tcg_temp_new_i32();
9598     gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
9599     tcg_gen_addi_i32(addr, addr, 4);
9600     t2 = tcg_temp_new_i32();
9601     gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
9602
9603     if (a->w) {
9604         /* Base writeback.  */
9605         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
9606         store_reg(s, a->rn, addr);
9607     } else {
9608         tcg_temp_free_i32(addr);
9609     }
9610     gen_rfe(s, t1, t2);
9611     return true;
9612 }
9613
9614 static bool trans_SRS(DisasContext *s, arg_SRS *a)
9615 {
9616     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
9617         return false;
9618     }
9619     gen_srs(s, a->mode, a->pu, a->w);
9620     return true;
9621 }
9622
9623 static bool trans_CPS(DisasContext *s, arg_CPS *a)
9624 {
9625     uint32_t mask, val;
9626
9627     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
9628         return false;
9629     }
9630     if (IS_USER(s)) {
9631         /* Implemented as NOP in user mode.  */
9632         return true;
9633     }
9634     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
9635
9636     mask = val = 0;
9637     if (a->imod & 2) {
9638         if (a->A) {
9639             mask |= CPSR_A;
9640         }
9641         if (a->I) {
9642             mask |= CPSR_I;
9643         }
9644         if (a->F) {
9645             mask |= CPSR_F;
9646         }
9647         if (a->imod & 1) {
9648             val |= mask;
9649         }
9650     }
9651     if (a->M) {
9652         mask |= CPSR_M;
9653         val |= a->mode;
9654     }
9655     if (mask) {
9656         gen_set_psr_im(s, mask, 0, val);
9657     }
9658     return true;
9659 }
9660
9661 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
9662 {
9663     TCGv_i32 tmp, addr, el;
9664
9665     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
9666         return false;
9667     }
9668     if (IS_USER(s)) {
9669         /* Implemented as NOP in user mode.  */
9670         return true;
9671     }
9672
9673     tmp = tcg_const_i32(a->im);
9674     /* FAULTMASK */
9675     if (a->F) {
9676         addr = tcg_const_i32(19);
9677         gen_helper_v7m_msr(cpu_env, addr, tmp);
9678         tcg_temp_free_i32(addr);
9679     }
9680     /* PRIMASK */
9681     if (a->I) {
9682         addr = tcg_const_i32(16);
9683         gen_helper_v7m_msr(cpu_env, addr, tmp);
9684         tcg_temp_free_i32(addr);
9685     }
9686     el = tcg_const_i32(s->current_el);
9687     gen_helper_rebuild_hflags_m32(cpu_env, el);
9688     tcg_temp_free_i32(el);
9689     tcg_temp_free_i32(tmp);
9690     gen_lookup_tb(s);
9691     return true;
9692 }
9693
9694 /*
9695  * Clear-Exclusive, Barriers
9696  */
9697
9698 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
9699 {
9700     if (s->thumb
9701         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
9702         : !ENABLE_ARCH_6K) {
9703         return false;
9704     }
9705     gen_clrex(s);
9706     return true;
9707 }
9708
9709 static bool trans_DSB(DisasContext *s, arg_DSB *a)
9710 {
9711     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
9712         return false;
9713     }
9714     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
9715     return true;
9716 }
9717
9718 static bool trans_DMB(DisasContext *s, arg_DMB *a)
9719 {
9720     return trans_DSB(s, NULL);
9721 }
9722
9723 static bool trans_ISB(DisasContext *s, arg_ISB *a)
9724 {
9725     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
9726         return false;
9727     }
9728     /*
9729      * We need to break the TB after this insn to execute
9730      * self-modifying code correctly and also to take
9731      * any pending interrupts immediately.
9732      */
9733     gen_goto_tb(s, 0, s->base.pc_next);
9734     return true;
9735 }
9736
9737 static bool trans_SB(DisasContext *s, arg_SB *a)
9738 {
9739     if (!dc_isar_feature(aa32_sb, s)) {
9740         return false;
9741     }
9742     /*
9743      * TODO: There is no speculation barrier opcode
9744      * for TCG; MB and end the TB instead.
9745      */
9746     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
9747     gen_goto_tb(s, 0, s->base.pc_next);
9748     return true;
9749 }
9750
9751 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
9752 {
9753     if (!ENABLE_ARCH_6) {
9754         return false;
9755     }
9756     if (a->E != (s->be_data == MO_BE)) {
9757         gen_helper_setend(cpu_env);
9758         s->base.is_jmp = DISAS_UPDATE;
9759     }
9760     return true;
9761 }
9762
9763 /*
9764  * Preload instructions
9765  * All are nops, contingent on the appropriate arch level.
9766  */
9767
9768 static bool trans_PLD(DisasContext *s, arg_PLD *a)
9769 {
9770     return ENABLE_ARCH_5TE;
9771 }
9772
9773 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
9774 {
9775     return arm_dc_feature(s, ARM_FEATURE_V7MP);
9776 }
9777
9778 static bool trans_PLI(DisasContext *s, arg_PLD *a)
9779 {
9780     return ENABLE_ARCH_7;
9781 }
9782
9783 /*
9784  * If-then
9785  */
9786
9787 static bool trans_IT(DisasContext *s, arg_IT *a)
9788 {
9789     int cond_mask = a->cond_mask;
9790
9791     /*
9792      * No actual code generated for this insn, just setup state.
9793      *
9794      * Combinations of firstcond and mask which set up an 0b1111
9795      * condition are UNPREDICTABLE; we take the CONSTRAINED
9796      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
9797      * i.e. both meaning "execute always".
9798      */
9799     s->condexec_cond = (cond_mask >> 4) & 0xe;
9800     s->condexec_mask = cond_mask & 0x1f;
9801     return true;
9802 }
9803
9804 /*
9805  * Legacy decoder.
9806  */
9807
9808 static void disas_arm_insn(DisasContext *s, unsigned int insn)
9809 {
9810     unsigned int cond = insn >> 28;
9811
9812     /* M variants do not implement ARM mode; this must raise the INVSTATE
9813      * UsageFault exception.
9814      */
9815     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9816         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
9817                            default_exception_el(s));
9818         return;
9819     }
9820
9821     if (cond == 0xf) {
9822         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
9823          * choose to UNDEF. In ARMv5 and above the space is used
9824          * for miscellaneous unconditional instructions.
9825          */
9826         ARCH(5);
9827
9828         /* Unconditional instructions.  */
9829         /* TODO: Perhaps merge these into one decodetree output file.  */
9830         if (disas_a32_uncond(s, insn) ||
9831             disas_vfp_uncond(s, insn) ||
9832             disas_neon_dp(s, insn) ||
9833             disas_neon_ls(s, insn) ||
9834             disas_neon_shared(s, insn)) {
9835             return;
9836         }
9837         /* fall back to legacy decoder */
9838
9839         if (((insn >> 25) & 7) == 1) {
9840             /* NEON Data processing.  */
9841             if (disas_neon_data_insn(s, insn)) {
9842                 goto illegal_op;
9843             }
9844             return;
9845         }
9846         if ((insn & 0x0e000f00) == 0x0c000100) {
9847             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
9848                 /* iWMMXt register transfer.  */
9849                 if (extract32(s->c15_cpar, 1, 1)) {
9850                     if (!disas_iwmmxt_insn(s, insn)) {
9851                         return;
9852                     }
9853                 }
9854             }
9855         }
9856         goto illegal_op;
9857     }
9858     if (cond != 0xe) {
9859         /* if not always execute, we generate a conditional jump to
9860            next instruction */
9861         arm_skip_unless(s, cond);
9862     }
9863
9864     /* TODO: Perhaps merge these into one decodetree output file.  */
9865     if (disas_a32(s, insn) ||
9866         disas_vfp(s, insn)) {
9867         return;
9868     }
9869     /* fall back to legacy decoder */
9870
9871     switch ((insn >> 24) & 0xf) {
9872     case 0xc:
9873     case 0xd:
9874     case 0xe:
9875         if (((insn >> 8) & 0xe) == 10) {
9876             /* VFP, but failed disas_vfp.  */
9877             goto illegal_op;
9878         }
9879         if (disas_coproc_insn(s, insn)) {
9880             /* Coprocessor.  */
9881             goto illegal_op;
9882         }
9883         break;
9884     default:
9885     illegal_op:
9886         unallocated_encoding(s);
9887         break;
9888     }
9889 }
9890
9891 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
9892 {
9893     /*
9894      * Return true if this is a 16 bit instruction. We must be precise
9895      * about this (matching the decode).
9896      */
9897     if ((insn >> 11) < 0x1d) {
9898         /* Definitely a 16-bit instruction */
9899         return true;
9900     }
9901
9902     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
9903      * first half of a 32-bit Thumb insn. Thumb-1 cores might
9904      * end up actually treating this as two 16-bit insns, though,
9905      * if it's half of a bl/blx pair that might span a page boundary.
9906      */
9907     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
9908         arm_dc_feature(s, ARM_FEATURE_M)) {
9909         /* Thumb2 cores (including all M profile ones) always treat
9910          * 32-bit insns as 32-bit.
9911          */
9912         return false;
9913     }
9914
9915     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
9916         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
9917          * is not on the next page; we merge this into a 32-bit
9918          * insn.
9919          */
9920         return false;
9921     }
9922     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
9923      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
9924      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
9925      *  -- handle as single 16 bit insn
9926      */
9927     return true;
9928 }
9929
9930 /* Translate a 32-bit thumb instruction. */
9931 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
9932 {
9933     /*
9934      * ARMv6-M supports a limited subset of Thumb2 instructions.
9935      * Other Thumb1 architectures allow only 32-bit
9936      * combined BL/BLX prefix and suffix.
9937      */
9938     if (arm_dc_feature(s, ARM_FEATURE_M) &&
9939         !arm_dc_feature(s, ARM_FEATURE_V7)) {
9940         int i;
9941         bool found = false;
9942         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9943                                                0xf3b08040 /* dsb */,
9944                                                0xf3b08050 /* dmb */,
9945                                                0xf3b08060 /* isb */,
9946                                                0xf3e08000 /* mrs */,
9947                                                0xf000d000 /* bl */};
9948         static const uint32_t armv6m_mask[] = {0xffe0d000,
9949                                                0xfff0d0f0,
9950                                                0xfff0d0f0,
9951                                                0xfff0d0f0,
9952                                                0xffe0d000,
9953                                                0xf800d000};
9954
9955         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9956             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9957                 found = true;
9958                 break;
9959             }
9960         }
9961         if (!found) {
9962             goto illegal_op;
9963         }
9964     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9965         ARCH(6T2);
9966     }
9967
9968     if ((insn & 0xef000000) == 0xef000000) {
9969         /*
9970          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9971          * transform into
9972          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9973          */
9974         uint32_t a32_insn = (insn & 0xe2ffffff) |
9975             ((insn & (1 << 28)) >> 4) | (1 << 28);
9976
9977         if (disas_neon_dp(s, a32_insn)) {
9978             return;
9979         }
9980     }
9981
9982     if ((insn & 0xff100000) == 0xf9000000) {
9983         /*
9984          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9985          * transform into
9986          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9987          */
9988         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9989
9990         if (disas_neon_ls(s, a32_insn)) {
9991             return;
9992         }
9993     }
9994
9995     /*
9996      * TODO: Perhaps merge these into one decodetree output file.
9997      * Note disas_vfp is written for a32 with cond field in the
9998      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9999      */
10000     if (disas_t32(s, insn) ||
10001         disas_vfp_uncond(s, insn) ||
10002         disas_neon_shared(s, insn) ||
10003         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
10004         return;
10005     }
10006     /* fall back to legacy decoder */
10007
10008     switch ((insn >> 25) & 0xf) {
10009     case 0: case 1: case 2: case 3:
10010         /* 16-bit instructions.  Should never happen.  */
10011         abort();
10012     case 6: case 7: case 14: case 15:
10013         /* Coprocessor.  */
10014         if (arm_dc_feature(s, ARM_FEATURE_M)) {
10015             /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
10016             if (extract32(insn, 24, 2) == 3) {
10017                 goto illegal_op; /* op0 = 0b11 : unallocated */
10018             }
10019
10020             if (((insn >> 8) & 0xe) == 10 &&
10021                 dc_isar_feature(aa32_fpsp_v2, s)) {
10022                 /* FP, and the CPU supports it */
10023                 goto illegal_op;
10024             } else {
10025                 /* All other insns: NOCP */
10026                 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
10027                                    syn_uncategorized(),
10028                                    default_exception_el(s));
10029             }
10030             break;
10031         }
10032         if (((insn >> 24) & 3) == 3) {
10033             /* Translate into the equivalent ARM encoding.  */
10034             insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
10035             if (disas_neon_data_insn(s, insn)) {
10036                 goto illegal_op;
10037             }
10038         } else if (((insn >> 8) & 0xe) == 10) {
10039             /* VFP, but failed disas_vfp.  */
10040             goto illegal_op;
10041         } else {
10042             if (insn & (1 << 28))
10043                 goto illegal_op;
10044             if (disas_coproc_insn(s, insn)) {
10045                 goto illegal_op;
10046             }
10047         }
10048         break;
10049     case 12:
10050         goto illegal_op;
10051     default:
10052     illegal_op:
10053         unallocated_encoding(s);
10054     }
10055 }
10056
10057 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
10058 {
10059     if (!disas_t16(s, insn)) {
10060         unallocated_encoding(s);
10061     }
10062 }
10063
10064 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
10065 {
10066     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
10067      * (False positives are OK, false negatives are not.)
10068      * We know this is a Thumb insn, and our caller ensures we are
10069      * only called if dc->base.pc_next is less than 4 bytes from the page
10070      * boundary, so we cross the page if the first 16 bits indicate
10071      * that this is a 32 bit insn.
10072      */
10073     uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
10074
10075     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
10076 }
10077
10078 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
10079 {
10080     DisasContext *dc = container_of(dcbase, DisasContext, base);
10081     CPUARMState *env = cs->env_ptr;
10082     ARMCPU *cpu = env_archcpu(env);
10083     uint32_t tb_flags = dc->base.tb->flags;
10084     uint32_t condexec, core_mmu_idx;
10085
10086     dc->isar = &cpu->isar;
10087     dc->condjmp = 0;
10088
10089     dc->aarch64 = 0;
10090     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
10091      * there is no secure EL1, so we route exceptions to EL3.
10092      */
10093     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
10094                                !arm_el_is_aa64(env, 3);
10095     dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
10096     dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10097     condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
10098     dc->condexec_mask = (condexec & 0xf) << 1;
10099     dc->condexec_cond = condexec >> 4;
10100
10101     core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
10102     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
10103     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10104 #if !defined(CONFIG_USER_ONLY)
10105     dc->user = (dc->current_el == 0);
10106 #endif
10107     dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
10108
10109     if (arm_feature(env, ARM_FEATURE_M)) {
10110         dc->vfp_enabled = 1;
10111         dc->be_data = MO_TE;
10112         dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
10113         dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
10114             regime_is_secure(env, dc->mmu_idx);
10115         dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
10116         dc->v8m_fpccr_s_wrong =
10117             FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
10118         dc->v7m_new_fp_ctxt_needed =
10119             FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
10120         dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
10121     } else {
10122         dc->be_data =
10123             FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10124         dc->debug_target_el =
10125             FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
10126         dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
10127         dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
10128         dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
10129         dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
10130         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
10131             dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
10132         } else {
10133             dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
10134             dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
10135         }
10136     }
10137     dc->cp_regs = cpu->cp_regs;
10138     dc->features = env->features;
10139
10140     /* Single step state. The code-generation logic here is:
10141      *  SS_ACTIVE == 0:
10142      *   generate code with no special handling for single-stepping (except
10143      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10144      *   this happens anyway because those changes are all system register or
10145      *   PSTATE writes).
10146      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10147      *   emit code for one insn
10148      *   emit code to clear PSTATE.SS
10149      *   emit code to generate software step exception for completed step
10150      *   end TB (as usual for having generated an exception)
10151      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10152      *   emit code to generate a software step exception
10153      *   end the TB
10154      */
10155     dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
10156     dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
10157     dc->is_ldex = false;
10158
10159     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
10160
10161     /* If architectural single step active, limit to 1.  */
10162     if (is_singlestepping(dc)) {
10163         dc->base.max_insns = 1;
10164     }
10165
10166     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
10167        to those left on the page.  */
10168     if (!dc->thumb) {
10169         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10170         dc->base.max_insns = MIN(dc->base.max_insns, bound);
10171     }
10172
10173     cpu_V0 = tcg_temp_new_i64();
10174     cpu_V1 = tcg_temp_new_i64();
10175     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
10176     cpu_M0 = tcg_temp_new_i64();
10177 }
10178
10179 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
10180 {
10181     DisasContext *dc = container_of(dcbase, DisasContext, base);
10182
10183     /* A note on handling of the condexec (IT) bits:
10184      *
10185      * We want to avoid the overhead of having to write the updated condexec
10186      * bits back to the CPUARMState for every instruction in an IT block. So:
10187      * (1) if the condexec bits are not already zero then we write
10188      * zero back into the CPUARMState now. This avoids complications trying
10189      * to do it at the end of the block. (For example if we don't do this
10190      * it's hard to identify whether we can safely skip writing condexec
10191      * at the end of the TB, which we definitely want to do for the case
10192      * where a TB doesn't do anything with the IT state at all.)
10193      * (2) if we are going to leave the TB then we call gen_set_condexec()
10194      * which will write the correct value into CPUARMState if zero is wrong.
10195      * This is done both for leaving the TB at the end, and for leaving
10196      * it because of an exception we know will happen, which is done in
10197      * gen_exception_insn(). The latter is necessary because we need to
10198      * leave the TB with the PC/IT state just prior to execution of the
10199      * instruction which caused the exception.
10200      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
10201      * then the CPUARMState will be wrong and we need to reset it.
10202      * This is handled in the same way as restoration of the
10203      * PC in these situations; we save the value of the condexec bits
10204      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
10205      * then uses this to restore them after an exception.
10206      *
10207      * Note that there are no instructions which can read the condexec
10208      * bits, and none which can write non-static values to them, so
10209      * we don't need to care about whether CPUARMState is correct in the
10210      * middle of a TB.
10211      */
10212
10213     /* Reset the conditional execution bits immediately. This avoids
10214        complications trying to do it at the end of the block.  */
10215     if (dc->condexec_mask || dc->condexec_cond) {
10216         TCGv_i32 tmp = tcg_temp_new_i32();
10217         tcg_gen_movi_i32(tmp, 0);
10218         store_cpu_field(tmp, condexec_bits);
10219     }
10220 }
10221
10222 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10223 {
10224     DisasContext *dc = container_of(dcbase, DisasContext, base);
10225
10226     tcg_gen_insn_start(dc->base.pc_next,
10227                        (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
10228                        0);
10229     dc->insn_start = tcg_last_op();
10230 }
10231
10232 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
10233                                     const CPUBreakpoint *bp)
10234 {
10235     DisasContext *dc = container_of(dcbase, DisasContext, base);
10236
10237     if (bp->flags & BP_CPU) {
10238         gen_set_condexec(dc);
10239         gen_set_pc_im(dc, dc->base.pc_next);
10240         gen_helper_check_breakpoints(cpu_env);
10241         /* End the TB early; it's likely not going to be executed */
10242         dc->base.is_jmp = DISAS_TOO_MANY;
10243     } else {
10244         gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
10245         /* The address covered by the breakpoint must be
10246            included in [tb->pc, tb->pc + tb->size) in order
10247            to for it to be properly cleared -- thus we
10248            increment the PC here so that the logic setting
10249            tb->size below does the right thing.  */
10250         /* TODO: Advance PC by correct instruction length to
10251          * avoid disassembler error messages */
10252         dc->base.pc_next += 2;
10253         dc->base.is_jmp = DISAS_NORETURN;
10254     }
10255
10256     return true;
10257 }
10258
10259 static bool arm_pre_translate_insn(DisasContext *dc)
10260 {
10261 #ifdef CONFIG_USER_ONLY
10262     /* Intercept jump to the magic kernel page.  */
10263     if (dc->base.pc_next >= 0xffff0000) {
10264         /* We always get here via a jump, so know we are not in a
10265            conditional execution block.  */
10266         gen_exception_internal(EXCP_KERNEL_TRAP);
10267         dc->base.is_jmp = DISAS_NORETURN;
10268         return true;
10269     }
10270 #endif
10271
10272     if (dc->ss_active && !dc->pstate_ss) {
10273         /* Singlestep state is Active-pending.
10274          * If we're in this state at the start of a TB then either
10275          *  a) we just took an exception to an EL which is being debugged
10276          *     and this is the first insn in the exception handler
10277          *  b) debug exceptions were masked and we just unmasked them
10278          *     without changing EL (eg by clearing PSTATE.D)
10279          * In either case we're going to take a swstep exception in the
10280          * "did not step an insn" case, and so the syndrome ISV and EX
10281          * bits should be zero.
10282          */
10283         assert(dc->base.num_insns == 1);
10284         gen_swstep_exception(dc, 0, 0);
10285         dc->base.is_jmp = DISAS_NORETURN;
10286         return true;
10287     }
10288
10289     return false;
10290 }
10291
10292 static void arm_post_translate_insn(DisasContext *dc)
10293 {
10294     if (dc->condjmp && !dc->base.is_jmp) {
10295         gen_set_label(dc->condlabel);
10296         dc->condjmp = 0;
10297     }
10298     translator_loop_temp_check(&dc->base);
10299 }
10300
10301 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10302 {
10303     DisasContext *dc = container_of(dcbase, DisasContext, base);
10304     CPUARMState *env = cpu->env_ptr;
10305     unsigned int insn;
10306
10307     if (arm_pre_translate_insn(dc)) {
10308         return;
10309     }
10310
10311     dc->pc_curr = dc->base.pc_next;
10312     insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
10313     dc->insn = insn;
10314     dc->base.pc_next += 4;
10315     disas_arm_insn(dc, insn);
10316
10317     arm_post_translate_insn(dc);
10318
10319     /* ARM is a fixed-length ISA.  We performed the cross-page check
10320        in init_disas_context by adjusting max_insns.  */
10321 }
10322
10323 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
10324 {
10325     /* Return true if this Thumb insn is always unconditional,
10326      * even inside an IT block. This is true of only a very few
10327      * instructions: BKPT, HLT, and SG.
10328      *
10329      * A larger class of instructions are UNPREDICTABLE if used
10330      * inside an IT block; we do not need to detect those here, because
10331      * what we do by default (perform the cc check and update the IT
10332      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
10333      * choice for those situations.
10334      *
10335      * insn is either a 16-bit or a 32-bit instruction; the two are
10336      * distinguishable because for the 16-bit case the top 16 bits
10337      * are zeroes, and that isn't a valid 32-bit encoding.
10338      */
10339     if ((insn & 0xffffff00) == 0xbe00) {
10340         /* BKPT */
10341         return true;
10342     }
10343
10344     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
10345         !arm_dc_feature(s, ARM_FEATURE_M)) {
10346         /* HLT: v8A only. This is unconditional even when it is going to
10347          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
10348          * For v7 cores this was a plain old undefined encoding and so
10349          * honours its cc check. (We might be using the encoding as
10350          * a semihosting trap, but we don't change the cc check behaviour
10351          * on that account, because a debugger connected to a real v7A
10352          * core and emulating semihosting traps by catching the UNDEF
10353          * exception would also only see cases where the cc check passed.
10354          * No guest code should be trying to do a HLT semihosting trap
10355          * in an IT block anyway.
10356          */
10357         return true;
10358     }
10359
10360     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
10361         arm_dc_feature(s, ARM_FEATURE_M)) {
10362         /* SG: v8M only */
10363         return true;
10364     }
10365
10366     return false;
10367 }
10368
10369 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10370 {
10371     DisasContext *dc = container_of(dcbase, DisasContext, base);
10372     CPUARMState *env = cpu->env_ptr;
10373     uint32_t insn;
10374     bool is_16bit;
10375
10376     if (arm_pre_translate_insn(dc)) {
10377         return;
10378     }
10379
10380     dc->pc_curr = dc->base.pc_next;
10381     insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
10382     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
10383     dc->base.pc_next += 2;
10384     if (!is_16bit) {
10385         uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
10386
10387         insn = insn << 16 | insn2;
10388         dc->base.pc_next += 2;
10389     }
10390     dc->insn = insn;
10391
10392     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
10393         uint32_t cond = dc->condexec_cond;
10394
10395         /*
10396          * Conditionally skip the insn. Note that both 0xe and 0xf mean
10397          * "always"; 0xf is not "never".
10398          */
10399         if (cond < 0x0e) {
10400             arm_skip_unless(dc, cond);
10401         }
10402     }
10403
10404     if (is_16bit) {
10405         disas_thumb_insn(dc, insn);
10406     } else {
10407         disas_thumb2_insn(dc, insn);
10408     }
10409
10410     /* Advance the Thumb condexec condition.  */
10411     if (dc->condexec_mask) {
10412         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
10413                              ((dc->condexec_mask >> 4) & 1));
10414         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
10415         if (dc->condexec_mask == 0) {
10416             dc->condexec_cond = 0;
10417         }
10418     }
10419
10420     arm_post_translate_insn(dc);
10421
10422     /* Thumb is a variable-length ISA.  Stop translation when the next insn
10423      * will touch a new page.  This ensures that prefetch aborts occur at
10424      * the right place.
10425      *
10426      * We want to stop the TB if the next insn starts in a new page,
10427      * or if it spans between this page and the next. This means that
10428      * if we're looking at the last halfword in the page we need to
10429      * see if it's a 16-bit Thumb insn (which will fit in this TB)
10430      * or a 32-bit Thumb insn (which won't).
10431      * This is to avoid generating a silly TB with a single 16-bit insn
10432      * in it at the end of this page (which would execute correctly
10433      * but isn't very efficient).
10434      */
10435     if (dc->base.is_jmp == DISAS_NEXT
10436         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
10437             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
10438                 && insn_crosses_page(env, dc)))) {
10439         dc->base.is_jmp = DISAS_TOO_MANY;
10440     }
10441 }
10442
10443 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
10444 {
10445     DisasContext *dc = container_of(dcbase, DisasContext, base);
10446
10447     if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
10448         /* FIXME: This can theoretically happen with self-modifying code. */
10449         cpu_abort(cpu, "IO on conditional branch instruction");
10450     }
10451
10452     /* At this stage dc->condjmp will only be set when the skipped
10453        instruction was a conditional branch or trap, and the PC has
10454        already been written.  */
10455     gen_set_condexec(dc);
10456     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
10457         /* Exception return branches need some special case code at the
10458          * end of the TB, which is complex enough that it has to
10459          * handle the single-step vs not and the condition-failed
10460          * insn codepath itself.
10461          */
10462         gen_bx_excret_final_code(dc);
10463     } else if (unlikely(is_singlestepping(dc))) {
10464         /* Unconditional and "condition passed" instruction codepath. */
10465         switch (dc->base.is_jmp) {
10466         case DISAS_SWI:
10467             gen_ss_advance(dc);
10468             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
10469                           default_exception_el(dc));
10470             break;
10471         case DISAS_HVC:
10472             gen_ss_advance(dc);
10473             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
10474             break;
10475         case DISAS_SMC:
10476             gen_ss_advance(dc);
10477             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
10478             break;
10479         case DISAS_NEXT:
10480         case DISAS_TOO_MANY:
10481         case DISAS_UPDATE:
10482             gen_set_pc_im(dc, dc->base.pc_next);
10483             /* fall through */
10484         default:
10485             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
10486             gen_singlestep_exception(dc);
10487             break;
10488         case DISAS_NORETURN:
10489             break;
10490         }
10491     } else {
10492         /* While branches must always occur at the end of an IT block,
10493            there are a few other things that can cause us to terminate
10494            the TB in the middle of an IT block:
10495             - Exception generating instructions (bkpt, swi, undefined).
10496             - Page boundaries.
10497             - Hardware watchpoints.
10498            Hardware breakpoints have already been handled and skip this code.
10499          */
10500         switch(dc->base.is_jmp) {
10501         case DISAS_NEXT:
10502         case DISAS_TOO_MANY:
10503             gen_goto_tb(dc, 1, dc->base.pc_next);
10504             break;
10505         case DISAS_JUMP:
10506             gen_goto_ptr();
10507             break;
10508         case DISAS_UPDATE:
10509             gen_set_pc_im(dc, dc->base.pc_next);
10510             /* fall through */
10511         default:
10512             /* indicate that the hash table must be used to find the next TB */
10513             tcg_gen_exit_tb(NULL, 0);
10514             break;
10515         case DISAS_NORETURN:
10516             /* nothing more to generate */
10517             break;
10518         case DISAS_WFI:
10519         {
10520             TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
10521                                           !(dc->insn & (1U << 31))) ? 2 : 4);
10522
10523             gen_helper_wfi(cpu_env, tmp);
10524             tcg_temp_free_i32(tmp);
10525             /* The helper doesn't necessarily throw an exception, but we
10526              * must go back to the main loop to check for interrupts anyway.
10527              */
10528             tcg_gen_exit_tb(NULL, 0);
10529             break;
10530         }
10531         case DISAS_WFE:
10532             gen_helper_wfe(cpu_env);
10533             break;
10534         case DISAS_YIELD:
10535             gen_helper_yield(cpu_env);
10536             break;
10537         case DISAS_SWI:
10538             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
10539                           default_exception_el(dc));
10540             break;
10541         case DISAS_HVC:
10542             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
10543             break;
10544         case DISAS_SMC:
10545             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
10546             break;
10547         }
10548     }
10549
10550     if (dc->condjmp) {
10551         /* "Condition failed" instruction codepath for the branch/trap insn */
10552         gen_set_label(dc->condlabel);
10553         gen_set_condexec(dc);
10554         if (unlikely(is_singlestepping(dc))) {
10555             gen_set_pc_im(dc, dc->base.pc_next);
10556             gen_singlestep_exception(dc);
10557         } else {
10558             gen_goto_tb(dc, 1, dc->base.pc_next);
10559         }
10560     }
10561 }
10562
10563 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
10564 {
10565     DisasContext *dc = container_of(dcbase, DisasContext, base);
10566
10567     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
10568     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
10569 }
10570
10571 static const TranslatorOps arm_translator_ops = {
10572     .init_disas_context = arm_tr_init_disas_context,
10573     .tb_start           = arm_tr_tb_start,
10574     .insn_start         = arm_tr_insn_start,
10575     .breakpoint_check   = arm_tr_breakpoint_check,
10576     .translate_insn     = arm_tr_translate_insn,
10577     .tb_stop            = arm_tr_tb_stop,
10578     .disas_log          = arm_tr_disas_log,
10579 };
10580
10581 static const TranslatorOps thumb_translator_ops = {
10582     .init_disas_context = arm_tr_init_disas_context,
10583     .tb_start           = arm_tr_tb_start,
10584     .insn_start         = arm_tr_insn_start,
10585     .breakpoint_check   = arm_tr_breakpoint_check,
10586     .translate_insn     = thumb_tr_translate_insn,
10587     .tb_stop            = arm_tr_tb_stop,
10588     .disas_log          = arm_tr_disas_log,
10589 };
10590
10591 /* generate intermediate code for basic block 'tb'.  */
10592 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
10593 {
10594     DisasContext dc = { };
10595     const TranslatorOps *ops = &arm_translator_ops;
10596
10597     if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
10598         ops = &thumb_translator_ops;
10599     }
10600 #ifdef TARGET_AARCH64
10601     if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
10602         ops = &aarch64_translator_ops;
10603     }
10604 #endif
10605
10606     translator_loop(ops, &dc.base, cpu, tb, max_insns);
10607 }
10608
10609 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
10610                           target_ulong *data)
10611 {
10612     if (is_a64(env)) {
10613         env->pc = data[0];
10614         env->condexec_bits = 0;
10615         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
10616     } else {
10617         env->regs[15] = data[0];
10618         env->condexec_bits = data[1];
10619         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
10620     }
10621 }