target/arm/translate.c

   1 /*
   2  *  ARM translation
   3  *
   4  *  Copyright (c) 2003 Fabrice Bellard
   5  *  Copyright (c) 2005-2007 CodeSourcery
   6  *  Copyright (c) 2007 OpenedHand, Ltd.
   7  *
   8  * This library is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2 of the License, or (at your option) any later version.
  12  *
  13  * This library is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  20  */
  21 #include "qemu/osdep.h"
  22
  23 #include "cpu.h"
  24 #include "internals.h"
  25 #include "disas/disas.h"
  26 #include "exec/exec-all.h"
  27 #include "tcg/tcg-op.h"
  28 #include "tcg/tcg-op-gvec.h"
  29 #include "qemu/log.h"
  30 #include "qemu/bitops.h"
  31 #include "arm_ldst.h"
  32 #include "hw/semihosting/semihost.h"
  33
  34 #include "exec/helper-proto.h"
  35 #include "exec/helper-gen.h"
  36
  37 #include "trace-tcg.h"
  38 #include "exec/log.h"
  39
  40
  41 #define ENABLE_ARCH_4T    arm_dc_feature(s, ARM_FEATURE_V4T)
  42 #define ENABLE_ARCH_5     arm_dc_feature(s, ARM_FEATURE_V5)
  43 /* currently all emulated v5 cores are also v5TE, so don't bother */
  44 #define ENABLE_ARCH_5TE   arm_dc_feature(s, ARM_FEATURE_V5)
  45 #define ENABLE_ARCH_5J    dc_isar_feature(aa32_jazelle, s)
  46 #define ENABLE_ARCH_6     arm_dc_feature(s, ARM_FEATURE_V6)
  47 #define ENABLE_ARCH_6K    arm_dc_feature(s, ARM_FEATURE_V6K)
  48 #define ENABLE_ARCH_6T2   arm_dc_feature(s, ARM_FEATURE_THUMB2)
  49 #define ENABLE_ARCH_7     arm_dc_feature(s, ARM_FEATURE_V7)
  50 #define ENABLE_ARCH_8     arm_dc_feature(s, ARM_FEATURE_V8)
  51
  52 #define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
  53
  54 #include "translate.h"
  55
  56 #if defined(CONFIG_USER_ONLY)
  57 #define IS_USER(s) 1
  58 #else
  59 #define IS_USER(s) (s->user)
  60 #endif
  61
  62 /* We reuse the same 64-bit temporaries for efficiency.  */
  63 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
  64 static TCGv_i32 cpu_R[16];
  65 TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
  66 TCGv_i64 cpu_exclusive_addr;
  67 TCGv_i64 cpu_exclusive_val;
  68
  69 #include "exec/gen-icount.h"
  70
  71 static const char * const regnames[] =
  72     { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
  73       "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
  74
  75 /* Function prototypes for gen_ functions calling Neon helpers.  */
  76 typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
  77                                  TCGv_i32, TCGv_i32);
  78 /* Function prototypes for gen_ functions for fix point conversions */
  79 typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  80
  81 /* initialize TCG globals.  */
  82 void arm_translate_init(void)
  83 {
  84     int i;
  85
  86     for (i = 0; i < 16; i++) {
  87         cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
  88                                           offsetof(CPUARMState, regs[i]),
  89                                           regnames[i]);
  90     }
  91     cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
  92     cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
  93     cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
  94     cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
  95
  96     cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
  97         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
  98     cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
  99         offsetof(CPUARMState, exclusive_val), "exclusive_val");
 100
 101     a64_translate_init();
 102 }
 103
 104 /* Flags for the disas_set_da_iss info argument:
 105  * lower bits hold the Rt register number, higher bits are flags.
 106  */
 107 typedef enum ISSInfo {
 108     ISSNone = 0,
 109     ISSRegMask = 0x1f,
 110     ISSInvalid = (1 << 5),
 111     ISSIsAcqRel = (1 << 6),
 112     ISSIsWrite = (1 << 7),
 113     ISSIs16Bit = (1 << 8),
 114 } ISSInfo;
 115
 116 /* Save the syndrome information for a Data Abort */
 117 static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
 118 {
 119     uint32_t syn;
 120     int sas = memop & MO_SIZE;
 121     bool sse = memop & MO_SIGN;
 122     bool is_acqrel = issinfo & ISSIsAcqRel;
 123     bool is_write = issinfo & ISSIsWrite;
 124     bool is_16bit = issinfo & ISSIs16Bit;
 125     int srt = issinfo & ISSRegMask;
 126
 127     if (issinfo & ISSInvalid) {
 128         /* Some callsites want to conditionally provide ISS info,
 129          * eg "only if this was not a writeback"
 130          */
 131         return;
 132     }
 133
 134     if (srt == 15) {
 135         /* For AArch32, insns where the src/dest is R15 never generate
 136          * ISS information. Catching that here saves checking at all
 137          * the call sites.
 138          */
 139         return;
 140     }
 141
 142     syn = syn_data_abort_with_iss(0, sas, sse, srt, 0, is_acqrel,
 143                                   0, 0, 0, is_write, 0, is_16bit);
 144     disas_set_insn_syndrome(s, syn);
 145 }
 146
 147 static inline int get_a32_user_mem_index(DisasContext *s)
 148 {
 149     /* Return the core mmu_idx to use for A32/T32 "unprivileged load/store"
 150      * insns:
 151      *  if PL2, UNPREDICTABLE (we choose to implement as if PL0)
 152      *  otherwise, access as if at PL0.
 153      */
 154     switch (s->mmu_idx) {
 155     case ARMMMUIdx_E2:        /* this one is UNPREDICTABLE */
 156     case ARMMMUIdx_E10_0:
 157     case ARMMMUIdx_E10_1:
 158     case ARMMMUIdx_E10_1_PAN:
 159         return arm_to_core_mmu_idx(ARMMMUIdx_E10_0);
 160     case ARMMMUIdx_SE3:
 161     case ARMMMUIdx_SE10_0:
 162     case ARMMMUIdx_SE10_1:
 163     case ARMMMUIdx_SE10_1_PAN:
 164         return arm_to_core_mmu_idx(ARMMMUIdx_SE10_0);
 165     case ARMMMUIdx_MUser:
 166     case ARMMMUIdx_MPriv:
 167         return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
 168     case ARMMMUIdx_MUserNegPri:
 169     case ARMMMUIdx_MPrivNegPri:
 170         return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
 171     case ARMMMUIdx_MSUser:
 172     case ARMMMUIdx_MSPriv:
 173         return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
 174     case ARMMMUIdx_MSUserNegPri:
 175     case ARMMMUIdx_MSPrivNegPri:
 176         return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
 177     default:
 178         g_assert_not_reached();
 179     }
 180 }
 181
 182 static inline TCGv_i32 load_cpu_offset(int offset)
 183 {
 184     TCGv_i32 tmp = tcg_temp_new_i32();
 185     tcg_gen_ld_i32(tmp, cpu_env, offset);
 186     return tmp;
 187 }
 188
 189 #define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
 190
 191 static inline void store_cpu_offset(TCGv_i32 var, int offset)
 192 {
 193     tcg_gen_st_i32(var, cpu_env, offset);
 194     tcg_temp_free_i32(var);
 195 }
 196
 197 #define store_cpu_field(var, name) \
 198     store_cpu_offset(var, offsetof(CPUARMState, name))
 199
 200 /* The architectural value of PC.  */
 201 static uint32_t read_pc(DisasContext *s)
 202 {
 203     return s->pc_curr + (s->thumb ? 4 : 8);
 204 }
 205
 206 /* Set a variable to the value of a CPU register.  */
 207 static void load_reg_var(DisasContext *s, TCGv_i32 var, int reg)
 208 {
 209     if (reg == 15) {
 210         tcg_gen_movi_i32(var, read_pc(s));
 211     } else {
 212         tcg_gen_mov_i32(var, cpu_R[reg]);
 213     }
 214 }
 215
 216 /* Create a new temporary and set it to the value of a CPU register.  */
 217 static inline TCGv_i32 load_reg(DisasContext *s, int reg)
 218 {
 219     TCGv_i32 tmp = tcg_temp_new_i32();
 220     load_reg_var(s, tmp, reg);
 221     return tmp;
 222 }
 223
 224 /*
 225  * Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
 226  * This is used for load/store for which use of PC implies (literal),
 227  * or ADD that implies ADR.
 228  */
 229 static TCGv_i32 add_reg_for_lit(DisasContext *s, int reg, int ofs)
 230 {
 231     TCGv_i32 tmp = tcg_temp_new_i32();
 232
 233     if (reg == 15) {
 234         tcg_gen_movi_i32(tmp, (read_pc(s) & ~3) + ofs);
 235     } else {
 236         tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
 237     }
 238     return tmp;
 239 }
 240
 241 /* Set a CPU register.  The source must be a temporary and will be
 242    marked as dead.  */
 243 static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
 244 {
 245     if (reg == 15) {
 246         /* In Thumb mode, we must ignore bit 0.
 247          * In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
 248          * are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
 249          * We choose to ignore [1:0] in ARM mode for all architecture versions.
 250          */
 251         tcg_gen_andi_i32(var, var, s->thumb ? ~1 : ~3);
 252         s->base.is_jmp = DISAS_JUMP;
 253     }
 254     tcg_gen_mov_i32(cpu_R[reg], var);
 255     tcg_temp_free_i32(var);
 256 }
 257
 258 /*
 259  * Variant of store_reg which applies v8M stack-limit checks before updating
 260  * SP. If the check fails this will result in an exception being taken.
 261  * We disable the stack checks for CONFIG_USER_ONLY because we have
 262  * no idea what the stack limits should be in that case.
 263  * If stack checking is not being done this just acts like store_reg().
 264  */
 265 static void store_sp_checked(DisasContext *s, TCGv_i32 var)
 266 {
 267 #ifndef CONFIG_USER_ONLY
 268     if (s->v8m_stackcheck) {
 269         gen_helper_v8m_stackcheck(cpu_env, var);
 270     }
 271 #endif
 272     store_reg(s, 13, var);
 273 }
 274
 275 /* Value extensions.  */
 276 #define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
 277 #define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
 278 #define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
 279 #define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
 280
 281 #define gen_sxtb16(var) gen_helper_sxtb16(var, var)
 282 #define gen_uxtb16(var) gen_helper_uxtb16(var, var)
 283
 284
 285 static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
 286 {
 287     TCGv_i32 tmp_mask = tcg_const_i32(mask);
 288     gen_helper_cpsr_write(cpu_env, var, tmp_mask);
 289     tcg_temp_free_i32(tmp_mask);
 290 }
 291 /* Set NZCV flags from the high 4 bits of var.  */
 292 #define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
 293
 294 static void gen_exception_internal(int excp)
 295 {
 296     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 297
 298     assert(excp_is_internal(excp));
 299     gen_helper_exception_internal(cpu_env, tcg_excp);
 300     tcg_temp_free_i32(tcg_excp);
 301 }
 302
 303 static void gen_step_complete_exception(DisasContext *s)
 304 {
 305     /* We just completed step of an insn. Move from Active-not-pending
 306      * to Active-pending, and then also take the swstep exception.
 307      * This corresponds to making the (IMPDEF) choice to prioritize
 308      * swstep exceptions over asynchronous exceptions taken to an exception
 309      * level where debug is disabled. This choice has the advantage that
 310      * we do not need to maintain internal state corresponding to the
 311      * ISV/EX syndrome bits between completion of the step and generation
 312      * of the exception, and our syndrome information is always correct.
 313      */
 314     gen_ss_advance(s);
 315     gen_swstep_exception(s, 1, s->is_ldex);
 316     s->base.is_jmp = DISAS_NORETURN;
 317 }
 318
 319 static void gen_singlestep_exception(DisasContext *s)
 320 {
 321     /* Generate the right kind of exception for singlestep, which is
 322      * either the architectural singlestep or EXCP_DEBUG for QEMU's
 323      * gdb singlestepping.
 324      */
 325     if (s->ss_active) {
 326         gen_step_complete_exception(s);
 327     } else {
 328         gen_exception_internal(EXCP_DEBUG);
 329     }
 330 }
 331
 332 static inline bool is_singlestepping(DisasContext *s)
 333 {
 334     /* Return true if we are singlestepping either because of
 335      * architectural singlestep or QEMU gdbstub singlestep. This does
 336      * not include the command line '-singlestep' mode which is rather
 337      * misnamed as it only means "one instruction per TB" and doesn't
 338      * affect the code we generate.
 339      */
 340     return s->base.singlestep_enabled || s->ss_active;
 341 }
 342
 343 static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
 344 {
 345     TCGv_i32 tmp1 = tcg_temp_new_i32();
 346     TCGv_i32 tmp2 = tcg_temp_new_i32();
 347     tcg_gen_ext16s_i32(tmp1, a);
 348     tcg_gen_ext16s_i32(tmp2, b);
 349     tcg_gen_mul_i32(tmp1, tmp1, tmp2);
 350     tcg_temp_free_i32(tmp2);
 351     tcg_gen_sari_i32(a, a, 16);
 352     tcg_gen_sari_i32(b, b, 16);
 353     tcg_gen_mul_i32(b, b, a);
 354     tcg_gen_mov_i32(a, tmp1);
 355     tcg_temp_free_i32(tmp1);
 356 }
 357
 358 /* Byteswap each halfword.  */
 359 static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
 360 {
 361     TCGv_i32 tmp = tcg_temp_new_i32();
 362     TCGv_i32 mask = tcg_const_i32(0x00ff00ff);
 363     tcg_gen_shri_i32(tmp, var, 8);
 364     tcg_gen_and_i32(tmp, tmp, mask);
 365     tcg_gen_and_i32(var, var, mask);
 366     tcg_gen_shli_i32(var, var, 8);
 367     tcg_gen_or_i32(dest, var, tmp);
 368     tcg_temp_free_i32(mask);
 369     tcg_temp_free_i32(tmp);
 370 }
 371
 372 /* Byteswap low halfword and sign extend.  */
 373 static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
 374 {
 375     tcg_gen_ext16u_i32(var, var);
 376     tcg_gen_bswap16_i32(var, var);
 377     tcg_gen_ext16s_i32(dest, var);
 378 }
 379
 380 /* 32x32->64 multiply.  Marks inputs as dead.  */
 381 static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
 382 {
 383     TCGv_i32 lo = tcg_temp_new_i32();
 384     TCGv_i32 hi = tcg_temp_new_i32();
 385     TCGv_i64 ret;
 386
 387     tcg_gen_mulu2_i32(lo, hi, a, b);
 388     tcg_temp_free_i32(a);
 389     tcg_temp_free_i32(b);
 390
 391     ret = tcg_temp_new_i64();
 392     tcg_gen_concat_i32_i64(ret, lo, hi);
 393     tcg_temp_free_i32(lo);
 394     tcg_temp_free_i32(hi);
 395
 396     return ret;
 397 }
 398
 399 static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
 400 {
 401     TCGv_i32 lo = tcg_temp_new_i32();
 402     TCGv_i32 hi = tcg_temp_new_i32();
 403     TCGv_i64 ret;
 404
 405     tcg_gen_muls2_i32(lo, hi, a, b);
 406     tcg_temp_free_i32(a);
 407     tcg_temp_free_i32(b);
 408
 409     ret = tcg_temp_new_i64();
 410     tcg_gen_concat_i32_i64(ret, lo, hi);
 411     tcg_temp_free_i32(lo);
 412     tcg_temp_free_i32(hi);
 413
 414     return ret;
 415 }
 416
 417 /* Swap low and high halfwords.  */
 418 static void gen_swap_half(TCGv_i32 var)
 419 {
 420     tcg_gen_rotri_i32(var, var, 16);
 421 }
 422
 423 /* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
 424     tmp = (t0 ^ t1) & 0x8000;
 425     t0 &= ~0x8000;
 426     t1 &= ~0x8000;
 427     t0 = (t0 + t1) ^ tmp;
 428  */
 429
 430 static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 431 {
 432     TCGv_i32 tmp = tcg_temp_new_i32();
 433     tcg_gen_xor_i32(tmp, t0, t1);
 434     tcg_gen_andi_i32(tmp, tmp, 0x8000);
 435     tcg_gen_andi_i32(t0, t0, ~0x8000);
 436     tcg_gen_andi_i32(t1, t1, ~0x8000);
 437     tcg_gen_add_i32(t0, t0, t1);
 438     tcg_gen_xor_i32(dest, t0, tmp);
 439     tcg_temp_free_i32(tmp);
 440 }
 441
 442 /* Set N and Z flags from var.  */
 443 static inline void gen_logic_CC(TCGv_i32 var)
 444 {
 445     tcg_gen_mov_i32(cpu_NF, var);
 446     tcg_gen_mov_i32(cpu_ZF, var);
 447 }
 448
 449 /* dest = T0 + T1 + CF. */
 450 static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 451 {
 452     tcg_gen_add_i32(dest, t0, t1);
 453     tcg_gen_add_i32(dest, dest, cpu_CF);
 454 }
 455
 456 /* dest = T0 - T1 + CF - 1.  */
 457 static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 458 {
 459     tcg_gen_sub_i32(dest, t0, t1);
 460     tcg_gen_add_i32(dest, dest, cpu_CF);
 461     tcg_gen_subi_i32(dest, dest, 1);
 462 }
 463
 464 /* dest = T0 + T1. Compute C, N, V and Z flags */
 465 static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 466 {
 467     TCGv_i32 tmp = tcg_temp_new_i32();
 468     tcg_gen_movi_i32(tmp, 0);
 469     tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
 470     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 471     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 472     tcg_gen_xor_i32(tmp, t0, t1);
 473     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 474     tcg_temp_free_i32(tmp);
 475     tcg_gen_mov_i32(dest, cpu_NF);
 476 }
 477
 478 /* dest = T0 + T1 + CF.  Compute C, N, V and Z flags */
 479 static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 480 {
 481     TCGv_i32 tmp = tcg_temp_new_i32();
 482     if (TCG_TARGET_HAS_add2_i32) {
 483         tcg_gen_movi_i32(tmp, 0);
 484         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
 485         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
 486     } else {
 487         TCGv_i64 q0 = tcg_temp_new_i64();
 488         TCGv_i64 q1 = tcg_temp_new_i64();
 489         tcg_gen_extu_i32_i64(q0, t0);
 490         tcg_gen_extu_i32_i64(q1, t1);
 491         tcg_gen_add_i64(q0, q0, q1);
 492         tcg_gen_extu_i32_i64(q1, cpu_CF);
 493         tcg_gen_add_i64(q0, q0, q1);
 494         tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
 495         tcg_temp_free_i64(q0);
 496         tcg_temp_free_i64(q1);
 497     }
 498     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 499     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 500     tcg_gen_xor_i32(tmp, t0, t1);
 501     tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 502     tcg_temp_free_i32(tmp);
 503     tcg_gen_mov_i32(dest, cpu_NF);
 504 }
 505
 506 /* dest = T0 - T1. Compute C, N, V and Z flags */
 507 static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 508 {
 509     TCGv_i32 tmp;
 510     tcg_gen_sub_i32(cpu_NF, t0, t1);
 511     tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 512     tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
 513     tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
 514     tmp = tcg_temp_new_i32();
 515     tcg_gen_xor_i32(tmp, t0, t1);
 516     tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 517     tcg_temp_free_i32(tmp);
 518     tcg_gen_mov_i32(dest, cpu_NF);
 519 }
 520
 521 /* dest = T0 + ~T1 + CF.  Compute C, N, V and Z flags */
 522 static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 523 {
 524     TCGv_i32 tmp = tcg_temp_new_i32();
 525     tcg_gen_not_i32(tmp, t1);
 526     gen_adc_CC(dest, t0, tmp);
 527     tcg_temp_free_i32(tmp);
 528 }
 529
 530 #define GEN_SHIFT(name)                                               \
 531 static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)       \
 532 {                                                                     \
 533     TCGv_i32 tmp1, tmp2, tmp3;                                        \
 534     tmp1 = tcg_temp_new_i32();                                        \
 535     tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
 536     tmp2 = tcg_const_i32(0);                                          \
 537     tmp3 = tcg_const_i32(0x1f);                                       \
 538     tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
 539     tcg_temp_free_i32(tmp3);                                          \
 540     tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
 541     tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
 542     tcg_temp_free_i32(tmp2);                                          \
 543     tcg_temp_free_i32(tmp1);                                          \
 544 }
 545 GEN_SHIFT(shl)
 546 GEN_SHIFT(shr)
 547 #undef GEN_SHIFT
 548
 549 static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
 550 {
 551     TCGv_i32 tmp1, tmp2;
 552     tmp1 = tcg_temp_new_i32();
 553     tcg_gen_andi_i32(tmp1, t1, 0xff);
 554     tmp2 = tcg_const_i32(0x1f);
 555     tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
 556     tcg_temp_free_i32(tmp2);
 557     tcg_gen_sar_i32(dest, t0, tmp1);
 558     tcg_temp_free_i32(tmp1);
 559 }
 560
 561 static void shifter_out_im(TCGv_i32 var, int shift)
 562 {
 563     tcg_gen_extract_i32(cpu_CF, var, shift, 1);
 564 }
 565
 566 /* Shift by immediate.  Includes special handling for shift == 0.  */
 567 static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
 568                                     int shift, int flags)
 569 {
 570     switch (shiftop) {
 571     case 0: /* LSL */
 572         if (shift != 0) {
 573             if (flags)
 574                 shifter_out_im(var, 32 - shift);
 575             tcg_gen_shli_i32(var, var, shift);
 576         }
 577         break;
 578     case 1: /* LSR */
 579         if (shift == 0) {
 580             if (flags) {
 581                 tcg_gen_shri_i32(cpu_CF, var, 31);
 582             }
 583             tcg_gen_movi_i32(var, 0);
 584         } else {
 585             if (flags)
 586                 shifter_out_im(var, shift - 1);
 587             tcg_gen_shri_i32(var, var, shift);
 588         }
 589         break;
 590     case 2: /* ASR */
 591         if (shift == 0)
 592             shift = 32;
 593         if (flags)
 594             shifter_out_im(var, shift - 1);
 595         if (shift == 32)
 596           shift = 31;
 597         tcg_gen_sari_i32(var, var, shift);
 598         break;
 599     case 3: /* ROR/RRX */
 600         if (shift != 0) {
 601             if (flags)
 602                 shifter_out_im(var, shift - 1);
 603             tcg_gen_rotri_i32(var, var, shift); break;
 604         } else {
 605             TCGv_i32 tmp = tcg_temp_new_i32();
 606             tcg_gen_shli_i32(tmp, cpu_CF, 31);
 607             if (flags)
 608                 shifter_out_im(var, 0);
 609             tcg_gen_shri_i32(var, var, 1);
 610             tcg_gen_or_i32(var, var, tmp);
 611             tcg_temp_free_i32(tmp);
 612         }
 613     }
 614 };
 615
 616 static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
 617                                      TCGv_i32 shift, int flags)
 618 {
 619     if (flags) {
 620         switch (shiftop) {
 621         case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
 622         case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
 623         case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
 624         case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
 625         }
 626     } else {
 627         switch (shiftop) {
 628         case 0:
 629             gen_shl(var, var, shift);
 630             break;
 631         case 1:
 632             gen_shr(var, var, shift);
 633             break;
 634         case 2:
 635             gen_sar(var, var, shift);
 636             break;
 637         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
 638                 tcg_gen_rotr_i32(var, var, shift); break;
 639         }
 640     }
 641     tcg_temp_free_i32(shift);
 642 }
 643
 644 /*
 645  * Generate a conditional based on ARM condition code cc.
 646  * This is common between ARM and Aarch64 targets.
 647  */
 648 void arm_test_cc(DisasCompare *cmp, int cc)
 649 {
 650     TCGv_i32 value;
 651     TCGCond cond;
 652     bool global = true;
 653
 654     switch (cc) {
 655     case 0: /* eq: Z */
 656     case 1: /* ne: !Z */
 657         cond = TCG_COND_EQ;
 658         value = cpu_ZF;
 659         break;
 660
 661     case 2: /* cs: C */
 662     case 3: /* cc: !C */
 663         cond = TCG_COND_NE;
 664         value = cpu_CF;
 665         break;
 666
 667     case 4: /* mi: N */
 668     case 5: /* pl: !N */
 669         cond = TCG_COND_LT;
 670         value = cpu_NF;
 671         break;
 672
 673     case 6: /* vs: V */
 674     case 7: /* vc: !V */
 675         cond = TCG_COND_LT;
 676         value = cpu_VF;
 677         break;
 678
 679     case 8: /* hi: C && !Z */
 680     case 9: /* ls: !C || Z -> !(C && !Z) */
 681         cond = TCG_COND_NE;
 682         value = tcg_temp_new_i32();
 683         global = false;
 684         /* CF is 1 for C, so -CF is an all-bits-set mask for C;
 685            ZF is non-zero for !Z; so AND the two subexpressions.  */
 686         tcg_gen_neg_i32(value, cpu_CF);
 687         tcg_gen_and_i32(value, value, cpu_ZF);
 688         break;
 689
 690     case 10: /* ge: N == V -> N ^ V == 0 */
 691     case 11: /* lt: N != V -> N ^ V != 0 */
 692         /* Since we're only interested in the sign bit, == 0 is >= 0.  */
 693         cond = TCG_COND_GE;
 694         value = tcg_temp_new_i32();
 695         global = false;
 696         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 697         break;
 698
 699     case 12: /* gt: !Z && N == V */
 700     case 13: /* le: Z || N != V */
 701         cond = TCG_COND_NE;
 702         value = tcg_temp_new_i32();
 703         global = false;
 704         /* (N == V) is equal to the sign bit of ~(NF ^ VF).  Propagate
 705          * the sign bit then AND with ZF to yield the result.  */
 706         tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
 707         tcg_gen_sari_i32(value, value, 31);
 708         tcg_gen_andc_i32(value, cpu_ZF, value);
 709         break;
 710
 711     case 14: /* always */
 712     case 15: /* always */
 713         /* Use the ALWAYS condition, which will fold early.
 714          * It doesn't matter what we use for the value.  */
 715         cond = TCG_COND_ALWAYS;
 716         value = cpu_ZF;
 717         goto no_invert;
 718
 719     default:
 720         fprintf(stderr, "Bad condition code 0x%x\n", cc);
 721         abort();
 722     }
 723
 724     if (cc & 1) {
 725         cond = tcg_invert_cond(cond);
 726     }
 727
 728  no_invert:
 729     cmp->cond = cond;
 730     cmp->value = value;
 731     cmp->value_global = global;
 732 }
 733
 734 void arm_free_cc(DisasCompare *cmp)
 735 {
 736     if (!cmp->value_global) {
 737         tcg_temp_free_i32(cmp->value);
 738     }
 739 }
 740
 741 void arm_jump_cc(DisasCompare *cmp, TCGLabel *label)
 742 {
 743     tcg_gen_brcondi_i32(cmp->cond, cmp->value, 0, label);
 744 }
 745
 746 void arm_gen_test_cc(int cc, TCGLabel *label)
 747 {
 748     DisasCompare cmp;
 749     arm_test_cc(&cmp, cc);
 750     arm_jump_cc(&cmp, label);
 751     arm_free_cc(&cmp);
 752 }
 753
 754 static inline void gen_set_condexec(DisasContext *s)
 755 {
 756     if (s->condexec_mask) {
 757         uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
 758         TCGv_i32 tmp = tcg_temp_new_i32();
 759         tcg_gen_movi_i32(tmp, val);
 760         store_cpu_field(tmp, condexec_bits);
 761     }
 762 }
 763
 764 static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
 765 {
 766     tcg_gen_movi_i32(cpu_R[15], val);
 767 }
 768
 769 /* Set PC and Thumb state from var.  var is marked as dead.  */
 770 static inline void gen_bx(DisasContext *s, TCGv_i32 var)
 771 {
 772     s->base.is_jmp = DISAS_JUMP;
 773     tcg_gen_andi_i32(cpu_R[15], var, ~1);
 774     tcg_gen_andi_i32(var, var, 1);
 775     store_cpu_field(var, thumb);
 776 }
 777
 778 /*
 779  * Set PC and Thumb state from var. var is marked as dead.
 780  * For M-profile CPUs, include logic to detect exception-return
 781  * branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
 782  * and BX reg, and no others, and happens only for code in Handler mode.
 783  * The Security Extension also requires us to check for the FNC_RETURN
 784  * which signals a function return from non-secure state; this can happen
 785  * in both Handler and Thread mode.
 786  * To avoid having to do multiple comparisons in inline generated code,
 787  * we make the check we do here loose, so it will match for EXC_RETURN
 788  * in Thread mode. For system emulation do_v7m_exception_exit() checks
 789  * for these spurious cases and returns without doing anything (giving
 790  * the same behaviour as for a branch to a non-magic address).
 791  *
 792  * In linux-user mode it is unclear what the right behaviour for an
 793  * attempted FNC_RETURN should be, because in real hardware this will go
 794  * directly to Secure code (ie not the Linux kernel) which will then treat
 795  * the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
 796  * attempt behave the way it would on a CPU without the security extension,
 797  * which is to say "like a normal branch". That means we can simply treat
 798  * all branches as normal with no magic address behaviour.
 799  */
 800 static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
 801 {
 802     /* Generate the same code here as for a simple bx, but flag via
 803      * s->base.is_jmp that we need to do the rest of the work later.
 804      */
 805     gen_bx(s, var);
 806 #ifndef CONFIG_USER_ONLY
 807     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) ||
 808         (s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
 809         s->base.is_jmp = DISAS_BX_EXCRET;
 810     }
 811 #endif
 812 }
 813
 814 static inline void gen_bx_excret_final_code(DisasContext *s)
 815 {
 816     /* Generate the code to finish possible exception return and end the TB */
 817     TCGLabel *excret_label = gen_new_label();
 818     uint32_t min_magic;
 819
 820     if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
 821         /* Covers FNC_RETURN and EXC_RETURN magic */
 822         min_magic = FNC_RETURN_MIN_MAGIC;
 823     } else {
 824         /* EXC_RETURN magic only */
 825         min_magic = EXC_RETURN_MIN_MAGIC;
 826     }
 827
 828     /* Is the new PC value in the magic range indicating exception return? */
 829     tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[15], min_magic, excret_label);
 830     /* No: end the TB as we would for a DISAS_JMP */
 831     if (is_singlestepping(s)) {
 832         gen_singlestep_exception(s);
 833     } else {
 834         tcg_gen_exit_tb(NULL, 0);
 835     }
 836     gen_set_label(excret_label);
 837     /* Yes: this is an exception return.
 838      * At this point in runtime env->regs[15] and env->thumb will hold
 839      * the exception-return magic number, which do_v7m_exception_exit()
 840      * will read. Nothing else will be able to see those values because
 841      * the cpu-exec main loop guarantees that we will always go straight
 842      * from raising the exception to the exception-handling code.
 843      *
 844      * gen_ss_advance(s) does nothing on M profile currently but
 845      * calling it is conceptually the right thing as we have executed
 846      * this instruction (compare SWI, HVC, SMC handling).
 847      */
 848     gen_ss_advance(s);
 849     gen_exception_internal(EXCP_EXCEPTION_EXIT);
 850 }
 851
 852 static inline void gen_bxns(DisasContext *s, int rm)
 853 {
 854     TCGv_i32 var = load_reg(s, rm);
 855
 856     /* The bxns helper may raise an EXCEPTION_EXIT exception, so in theory
 857      * we need to sync state before calling it, but:
 858      *  - we don't need to do gen_set_pc_im() because the bxns helper will
 859      *    always set the PC itself
 860      *  - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
 861      *    unless it's outside an IT block or the last insn in an IT block,
 862      *    so we know that condexec == 0 (already set at the top of the TB)
 863      *    is correct in the non-UNPREDICTABLE cases, and we can choose
 864      *    "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
 865      */
 866     gen_helper_v7m_bxns(cpu_env, var);
 867     tcg_temp_free_i32(var);
 868     s->base.is_jmp = DISAS_EXIT;
 869 }
 870
 871 static inline void gen_blxns(DisasContext *s, int rm)
 872 {
 873     TCGv_i32 var = load_reg(s, rm);
 874
 875     /* We don't need to sync condexec state, for the same reason as bxns.
 876      * We do however need to set the PC, because the blxns helper reads it.
 877      * The blxns helper may throw an exception.
 878      */
 879     gen_set_pc_im(s, s->base.pc_next);
 880     gen_helper_v7m_blxns(cpu_env, var);
 881     tcg_temp_free_i32(var);
 882     s->base.is_jmp = DISAS_EXIT;
 883 }
 884
 885 /* Variant of store_reg which uses branch&exchange logic when storing
 886    to r15 in ARM architecture v7 and above. The source must be a temporary
 887    and will be marked as dead. */
 888 static inline void store_reg_bx(DisasContext *s, int reg, TCGv_i32 var)
 889 {
 890     if (reg == 15 && ENABLE_ARCH_7) {
 891         gen_bx(s, var);
 892     } else {
 893         store_reg(s, reg, var);
 894     }
 895 }
 896
 897 /* Variant of store_reg which uses branch&exchange logic when storing
 898  * to r15 in ARM architecture v5T and above. This is used for storing
 899  * the results of a LDR/LDM/POP into r15, and corresponds to the cases
 900  * in the ARM ARM which use the LoadWritePC() pseudocode function. */
 901 static inline void store_reg_from_load(DisasContext *s, int reg, TCGv_i32 var)
 902 {
 903     if (reg == 15 && ENABLE_ARCH_5) {
 904         gen_bx_excret(s, var);
 905     } else {
 906         store_reg(s, reg, var);
 907     }
 908 }
 909
 910 #ifdef CONFIG_USER_ONLY
 911 #define IS_USER_ONLY 1
 912 #else
 913 #define IS_USER_ONLY 0
 914 #endif
 915
 916 /* Abstractions of "generate code to do a guest load/store for
 917  * AArch32", where a vaddr is always 32 bits (and is zero
 918  * extended if we're a 64 bit core) and  data is also
 919  * 32 bits unless specifically doing a 64 bit access.
 920  * These functions work like tcg_gen_qemu_{ld,st}* except
 921  * that the address argument is TCGv_i32 rather than TCGv.
 922  */
 923
 924 static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
 925 {
 926     TCGv addr = tcg_temp_new();
 927     tcg_gen_extu_i32_tl(addr, a32);
 928
 929     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 930     if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
 931         tcg_gen_xori_tl(addr, addr, 4 - (1 << (op & MO_SIZE)));
 932     }
 933     return addr;
 934 }
 935
 936 static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 937                             int index, MemOp opc)
 938 {
 939     TCGv addr;
 940
 941     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 942         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 943         opc |= MO_ALIGN;
 944     }
 945
 946     addr = gen_aa32_addr(s, a32, opc);
 947     tcg_gen_qemu_ld_i32(val, addr, index, opc);
 948     tcg_temp_free(addr);
 949 }
 950
 951 static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
 952                             int index, MemOp opc)
 953 {
 954     TCGv addr;
 955
 956     if (arm_dc_feature(s, ARM_FEATURE_M) &&
 957         !arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
 958         opc |= MO_ALIGN;
 959     }
 960
 961     addr = gen_aa32_addr(s, a32, opc);
 962     tcg_gen_qemu_st_i32(val, addr, index, opc);
 963     tcg_temp_free(addr);
 964 }
 965
 966 #define DO_GEN_LD(SUFF, OPC)                                             \
 967 static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val,      \
 968                                      TCGv_i32 a32, int index)            \
 969 {                                                                        \
 970     gen_aa32_ld_i32(s, val, a32, index, OPC | s->be_data);               \
 971 }
 972
 973 #define DO_GEN_ST(SUFF, OPC)                                             \
 974 static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val,      \
 975                                      TCGv_i32 a32, int index)            \
 976 {                                                                        \
 977     gen_aa32_st_i32(s, val, a32, index, OPC | s->be_data);               \
 978 }
 979
 980 static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
 981 {
 982     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
 983     if (!IS_USER_ONLY && s->sctlr_b) {
 984         tcg_gen_rotri_i64(val, val, 32);
 985     }
 986 }
 987
 988 static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
 989                             int index, MemOp opc)
 990 {
 991     TCGv addr = gen_aa32_addr(s, a32, opc);
 992     tcg_gen_qemu_ld_i64(val, addr, index, opc);
 993     gen_aa32_frob64(s, val);
 994     tcg_temp_free(addr);
 995 }
 996
 997 static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
 998                                  TCGv_i32 a32, int index)
 999 {
1000     gen_aa32_ld_i64(s, val, a32, index, MO_Q | s->be_data);
1001 }
1002
1003 static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1004                             int index, MemOp opc)
1005 {
1006     TCGv addr = gen_aa32_addr(s, a32, opc);
1007
1008     /* Not needed for user-mode BE32, where we use MO_BE instead.  */
1009     if (!IS_USER_ONLY && s->sctlr_b) {
1010         TCGv_i64 tmp = tcg_temp_new_i64();
1011         tcg_gen_rotri_i64(tmp, val, 32);
1012         tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1013         tcg_temp_free_i64(tmp);
1014     } else {
1015         tcg_gen_qemu_st_i64(val, addr, index, opc);
1016     }
1017     tcg_temp_free(addr);
1018 }
1019
1020 static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1021                                  TCGv_i32 a32, int index)
1022 {
1023     gen_aa32_st_i64(s, val, a32, index, MO_Q | s->be_data);
1024 }
1025
1026 DO_GEN_LD(8u, MO_UB)
1027 DO_GEN_LD(16u, MO_UW)
1028 DO_GEN_LD(32u, MO_UL)
1029 DO_GEN_ST(8, MO_UB)
1030 DO_GEN_ST(16, MO_UW)
1031 DO_GEN_ST(32, MO_UL)
1032
1033 static inline void gen_hvc(DisasContext *s, int imm16)
1034 {
1035     /* The pre HVC helper handles cases when HVC gets trapped
1036      * as an undefined insn by runtime configuration (ie before
1037      * the insn really executes).
1038      */
1039     gen_set_pc_im(s, s->pc_curr);
1040     gen_helper_pre_hvc(cpu_env);
1041     /* Otherwise we will treat this as a real exception which
1042      * happens after execution of the insn. (The distinction matters
1043      * for the PC value reported to the exception handler and also
1044      * for single stepping.)
1045      */
1046     s->svc_imm = imm16;
1047     gen_set_pc_im(s, s->base.pc_next);
1048     s->base.is_jmp = DISAS_HVC;
1049 }
1050
1051 static inline void gen_smc(DisasContext *s)
1052 {
1053     /* As with HVC, we may take an exception either before or after
1054      * the insn executes.
1055      */
1056     TCGv_i32 tmp;
1057
1058     gen_set_pc_im(s, s->pc_curr);
1059     tmp = tcg_const_i32(syn_aa32_smc());
1060     gen_helper_pre_smc(cpu_env, tmp);
1061     tcg_temp_free_i32(tmp);
1062     gen_set_pc_im(s, s->base.pc_next);
1063     s->base.is_jmp = DISAS_SMC;
1064 }
1065
1066 static void gen_exception_internal_insn(DisasContext *s, uint32_t pc, int excp)
1067 {
1068     gen_set_condexec(s);
1069     gen_set_pc_im(s, pc);
1070     gen_exception_internal(excp);
1071     s->base.is_jmp = DISAS_NORETURN;
1072 }
1073
1074 static void gen_exception_insn(DisasContext *s, uint32_t pc, int excp,
1075                                int syn, uint32_t target_el)
1076 {
1077     gen_set_condexec(s);
1078     gen_set_pc_im(s, pc);
1079     gen_exception(excp, syn, target_el);
1080     s->base.is_jmp = DISAS_NORETURN;
1081 }
1082
1083 static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1084 {
1085     TCGv_i32 tcg_syn;
1086
1087     gen_set_condexec(s);
1088     gen_set_pc_im(s, s->pc_curr);
1089     tcg_syn = tcg_const_i32(syn);
1090     gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1091     tcg_temp_free_i32(tcg_syn);
1092     s->base.is_jmp = DISAS_NORETURN;
1093 }
1094
1095 static void unallocated_encoding(DisasContext *s)
1096 {
1097     /* Unallocated and reserved encodings are uncategorized */
1098     gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1099                        default_exception_el(s));
1100 }
1101
1102 /* Force a TB lookup after an instruction that changes the CPU state.  */
1103 static inline void gen_lookup_tb(DisasContext *s)
1104 {
1105     tcg_gen_movi_i32(cpu_R[15], s->base.pc_next);
1106     s->base.is_jmp = DISAS_EXIT;
1107 }
1108
1109 static inline void gen_hlt(DisasContext *s, int imm)
1110 {
1111     /* HLT. This has two purposes.
1112      * Architecturally, it is an external halting debug instruction.
1113      * Since QEMU doesn't implement external debug, we treat this as
1114      * it is required for halting debug disabled: it will UNDEF.
1115      * Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1116      * and "HLT 0xF000" is an A32 semihosting syscall. These traps
1117      * must trigger semihosting even for ARMv7 and earlier, where
1118      * HLT was an undefined encoding.
1119      * In system mode, we don't allow userspace access to
1120      * semihosting, to provide some semblance of security
1121      * (and for consistency with our 32-bit semihosting).
1122      */
1123     if (semihosting_enabled() &&
1124 #ifndef CONFIG_USER_ONLY
1125         s->current_el != 0 &&
1126 #endif
1127         (imm == (s->thumb ? 0x3c : 0xf000))) {
1128         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
1129         return;
1130     }
1131
1132     unallocated_encoding(s);
1133 }
1134
1135 static TCGv_ptr get_fpstatus_ptr(int neon)
1136 {
1137     TCGv_ptr statusptr = tcg_temp_new_ptr();
1138     int offset;
1139     if (neon) {
1140         offset = offsetof(CPUARMState, vfp.standard_fp_status);
1141     } else {
1142         offset = offsetof(CPUARMState, vfp.fp_status);
1143     }
1144     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1145     return statusptr;
1146 }
1147
1148 static inline long vfp_reg_offset(bool dp, unsigned reg)
1149 {
1150     if (dp) {
1151         return offsetof(CPUARMState, vfp.zregs[reg >> 1].d[reg & 1]);
1152     } else {
1153         long ofs = offsetof(CPUARMState, vfp.zregs[reg >> 2].d[(reg >> 1) & 1]);
1154         if (reg & 1) {
1155             ofs += offsetof(CPU_DoubleU, l.upper);
1156         } else {
1157             ofs += offsetof(CPU_DoubleU, l.lower);
1158         }
1159         return ofs;
1160     }
1161 }
1162
1163 /* Return the offset of a 32-bit piece of a NEON register.
1164    zero is the least significant end of the register.  */
1165 static inline long
1166 neon_reg_offset (int reg, int n)
1167 {
1168     int sreg;
1169     sreg = reg * 2 + n;
1170     return vfp_reg_offset(0, sreg);
1171 }
1172
1173 /* Return the offset of a 2**SIZE piece of a NEON register, at index ELE,
1174  * where 0 is the least significant end of the register.
1175  */
1176 static inline long
1177 neon_element_offset(int reg, int element, MemOp size)
1178 {
1179     int element_size = 1 << size;
1180     int ofs = element * element_size;
1181 #ifdef HOST_WORDS_BIGENDIAN
1182     /* Calculate the offset assuming fully little-endian,
1183      * then XOR to account for the order of the 8-byte units.
1184      */
1185     if (element_size < 8) {
1186         ofs ^= 8 - element_size;
1187     }
1188 #endif
1189     return neon_reg_offset(reg, 0) + ofs;
1190 }
1191
1192 static TCGv_i32 neon_load_reg(int reg, int pass)
1193 {
1194     TCGv_i32 tmp = tcg_temp_new_i32();
1195     tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1196     return tmp;
1197 }
1198
1199 static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1200 {
1201     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1202
1203     switch (mop) {
1204     case MO_UB:
1205         tcg_gen_ld8u_i32(var, cpu_env, offset);
1206         break;
1207     case MO_UW:
1208         tcg_gen_ld16u_i32(var, cpu_env, offset);
1209         break;
1210     case MO_UL:
1211         tcg_gen_ld_i32(var, cpu_env, offset);
1212         break;
1213     default:
1214         g_assert_not_reached();
1215     }
1216 }
1217
1218 static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1219 {
1220     long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1221
1222     switch (mop) {
1223     case MO_UB:
1224         tcg_gen_ld8u_i64(var, cpu_env, offset);
1225         break;
1226     case MO_UW:
1227         tcg_gen_ld16u_i64(var, cpu_env, offset);
1228         break;
1229     case MO_UL:
1230         tcg_gen_ld32u_i64(var, cpu_env, offset);
1231         break;
1232     case MO_Q:
1233         tcg_gen_ld_i64(var, cpu_env, offset);
1234         break;
1235     default:
1236         g_assert_not_reached();
1237     }
1238 }
1239
1240 static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1241 {
1242     tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1243     tcg_temp_free_i32(var);
1244 }
1245
1246 static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1247 {
1248     long offset = neon_element_offset(reg, ele, size);
1249
1250     switch (size) {
1251     case MO_8:
1252         tcg_gen_st8_i32(var, cpu_env, offset);
1253         break;
1254     case MO_16:
1255         tcg_gen_st16_i32(var, cpu_env, offset);
1256         break;
1257     case MO_32:
1258         tcg_gen_st_i32(var, cpu_env, offset);
1259         break;
1260     default:
1261         g_assert_not_reached();
1262     }
1263 }
1264
1265 static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1266 {
1267     long offset = neon_element_offset(reg, ele, size);
1268
1269     switch (size) {
1270     case MO_8:
1271         tcg_gen_st8_i64(var, cpu_env, offset);
1272         break;
1273     case MO_16:
1274         tcg_gen_st16_i64(var, cpu_env, offset);
1275         break;
1276     case MO_32:
1277         tcg_gen_st32_i64(var, cpu_env, offset);
1278         break;
1279     case MO_64:
1280         tcg_gen_st_i64(var, cpu_env, offset);
1281         break;
1282     default:
1283         g_assert_not_reached();
1284     }
1285 }
1286
1287 static inline void neon_load_reg64(TCGv_i64 var, int reg)
1288 {
1289     tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1290 }
1291
1292 static inline void neon_store_reg64(TCGv_i64 var, int reg)
1293 {
1294     tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1295 }
1296
1297 static inline void neon_load_reg32(TCGv_i32 var, int reg)
1298 {
1299     tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1300 }
1301
1302 static inline void neon_store_reg32(TCGv_i32 var, int reg)
1303 {
1304     tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1305 }
1306
1307 static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1308 {
1309     TCGv_ptr ret = tcg_temp_new_ptr();
1310     tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1311     return ret;
1312 }
1313
1314 #define ARM_CP_RW_BIT   (1 << 20)
1315
1316 /* Include the VFP and Neon decoders */
1317 #include "translate-vfp.inc.c"
1318 #include "translate-neon.inc.c"
1319
1320 static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1321 {
1322     tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1323 }
1324
1325 static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1326 {
1327     tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1328 }
1329
1330 static inline TCGv_i32 iwmmxt_load_creg(int reg)
1331 {
1332     TCGv_i32 var = tcg_temp_new_i32();
1333     tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1334     return var;
1335 }
1336
1337 static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1338 {
1339     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1340     tcg_temp_free_i32(var);
1341 }
1342
1343 static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1344 {
1345     iwmmxt_store_reg(cpu_M0, rn);
1346 }
1347
1348 static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1349 {
1350     iwmmxt_load_reg(cpu_M0, rn);
1351 }
1352
1353 static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1354 {
1355     iwmmxt_load_reg(cpu_V1, rn);
1356     tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1357 }
1358
1359 static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1360 {
1361     iwmmxt_load_reg(cpu_V1, rn);
1362     tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1363 }
1364
1365 static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1366 {
1367     iwmmxt_load_reg(cpu_V1, rn);
1368     tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1369 }
1370
1371 #define IWMMXT_OP(name) \
1372 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1373 { \
1374     iwmmxt_load_reg(cpu_V1, rn); \
1375     gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1376 }
1377
1378 #define IWMMXT_OP_ENV(name) \
1379 static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1380 { \
1381     iwmmxt_load_reg(cpu_V1, rn); \
1382     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1383 }
1384
1385 #define IWMMXT_OP_ENV_SIZE(name) \
1386 IWMMXT_OP_ENV(name##b) \
1387 IWMMXT_OP_ENV(name##w) \
1388 IWMMXT_OP_ENV(name##l)
1389
1390 #define IWMMXT_OP_ENV1(name) \
1391 static inline void gen_op_iwmmxt_##name##_M0(void) \
1392 { \
1393     gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1394 }
1395
1396 IWMMXT_OP(maddsq)
1397 IWMMXT_OP(madduq)
1398 IWMMXT_OP(sadb)
1399 IWMMXT_OP(sadw)
1400 IWMMXT_OP(mulslw)
1401 IWMMXT_OP(mulshw)
1402 IWMMXT_OP(mululw)
1403 IWMMXT_OP(muluhw)
1404 IWMMXT_OP(macsw)
1405 IWMMXT_OP(macuw)
1406
1407 IWMMXT_OP_ENV_SIZE(unpackl)
1408 IWMMXT_OP_ENV_SIZE(unpackh)
1409
1410 IWMMXT_OP_ENV1(unpacklub)
1411 IWMMXT_OP_ENV1(unpackluw)
1412 IWMMXT_OP_ENV1(unpacklul)
1413 IWMMXT_OP_ENV1(unpackhub)
1414 IWMMXT_OP_ENV1(unpackhuw)
1415 IWMMXT_OP_ENV1(unpackhul)
1416 IWMMXT_OP_ENV1(unpacklsb)
1417 IWMMXT_OP_ENV1(unpacklsw)
1418 IWMMXT_OP_ENV1(unpacklsl)
1419 IWMMXT_OP_ENV1(unpackhsb)
1420 IWMMXT_OP_ENV1(unpackhsw)
1421 IWMMXT_OP_ENV1(unpackhsl)
1422
1423 IWMMXT_OP_ENV_SIZE(cmpeq)
1424 IWMMXT_OP_ENV_SIZE(cmpgtu)
1425 IWMMXT_OP_ENV_SIZE(cmpgts)
1426
1427 IWMMXT_OP_ENV_SIZE(mins)
1428 IWMMXT_OP_ENV_SIZE(minu)
1429 IWMMXT_OP_ENV_SIZE(maxs)
1430 IWMMXT_OP_ENV_SIZE(maxu)
1431
1432 IWMMXT_OP_ENV_SIZE(subn)
1433 IWMMXT_OP_ENV_SIZE(addn)
1434 IWMMXT_OP_ENV_SIZE(subu)
1435 IWMMXT_OP_ENV_SIZE(addu)
1436 IWMMXT_OP_ENV_SIZE(subs)
1437 IWMMXT_OP_ENV_SIZE(adds)
1438
1439 IWMMXT_OP_ENV(avgb0)
1440 IWMMXT_OP_ENV(avgb1)
1441 IWMMXT_OP_ENV(avgw0)
1442 IWMMXT_OP_ENV(avgw1)
1443
1444 IWMMXT_OP_ENV(packuw)
1445 IWMMXT_OP_ENV(packul)
1446 IWMMXT_OP_ENV(packuq)
1447 IWMMXT_OP_ENV(packsw)
1448 IWMMXT_OP_ENV(packsl)
1449 IWMMXT_OP_ENV(packsq)
1450
1451 static void gen_op_iwmmxt_set_mup(void)
1452 {
1453     TCGv_i32 tmp;
1454     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1455     tcg_gen_ori_i32(tmp, tmp, 2);
1456     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1457 }
1458
1459 static void gen_op_iwmmxt_set_cup(void)
1460 {
1461     TCGv_i32 tmp;
1462     tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1463     tcg_gen_ori_i32(tmp, tmp, 1);
1464     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1465 }
1466
1467 static void gen_op_iwmmxt_setpsr_nz(void)
1468 {
1469     TCGv_i32 tmp = tcg_temp_new_i32();
1470     gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1471     store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1472 }
1473
1474 static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1475 {
1476     iwmmxt_load_reg(cpu_V1, rn);
1477     tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1478     tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1479 }
1480
1481 static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1482                                      TCGv_i32 dest)
1483 {
1484     int rd;
1485     uint32_t offset;
1486     TCGv_i32 tmp;
1487
1488     rd = (insn >> 16) & 0xf;
1489     tmp = load_reg(s, rd);
1490
1491     offset = (insn & 0xff) << ((insn >> 7) & 2);
1492     if (insn & (1 << 24)) {
1493         /* Pre indexed */
1494         if (insn & (1 << 23))
1495             tcg_gen_addi_i32(tmp, tmp, offset);
1496         else
1497             tcg_gen_addi_i32(tmp, tmp, -offset);
1498         tcg_gen_mov_i32(dest, tmp);
1499         if (insn & (1 << 21))
1500             store_reg(s, rd, tmp);
1501         else
1502             tcg_temp_free_i32(tmp);
1503     } else if (insn & (1 << 21)) {
1504         /* Post indexed */
1505         tcg_gen_mov_i32(dest, tmp);
1506         if (insn & (1 << 23))
1507             tcg_gen_addi_i32(tmp, tmp, offset);
1508         else
1509             tcg_gen_addi_i32(tmp, tmp, -offset);
1510         store_reg(s, rd, tmp);
1511     } else if (!(insn & (1 << 23)))
1512         return 1;
1513     return 0;
1514 }
1515
1516 static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1517 {
1518     int rd = (insn >> 0) & 0xf;
1519     TCGv_i32 tmp;
1520
1521     if (insn & (1 << 8)) {
1522         if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1523             return 1;
1524         } else {
1525             tmp = iwmmxt_load_creg(rd);
1526         }
1527     } else {
1528         tmp = tcg_temp_new_i32();
1529         iwmmxt_load_reg(cpu_V0, rd);
1530         tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1531     }
1532     tcg_gen_andi_i32(tmp, tmp, mask);
1533     tcg_gen_mov_i32(dest, tmp);
1534     tcg_temp_free_i32(tmp);
1535     return 0;
1536 }
1537
1538 /* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1539    (ie. an undefined instruction).  */
1540 static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1541 {
1542     int rd, wrd;
1543     int rdhi, rdlo, rd0, rd1, i;
1544     TCGv_i32 addr;
1545     TCGv_i32 tmp, tmp2, tmp3;
1546
1547     if ((insn & 0x0e000e00) == 0x0c000000) {
1548         if ((insn & 0x0fe00ff0) == 0x0c400000) {
1549             wrd = insn & 0xf;
1550             rdlo = (insn >> 12) & 0xf;
1551             rdhi = (insn >> 16) & 0xf;
1552             if (insn & ARM_CP_RW_BIT) {                         /* TMRRC */
1553                 iwmmxt_load_reg(cpu_V0, wrd);
1554                 tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1555                 tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1556             } else {                                    /* TMCRR */
1557                 tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1558                 iwmmxt_store_reg(cpu_V0, wrd);
1559                 gen_op_iwmmxt_set_mup();
1560             }
1561             return 0;
1562         }
1563
1564         wrd = (insn >> 12) & 0xf;
1565         addr = tcg_temp_new_i32();
1566         if (gen_iwmmxt_address(s, insn, addr)) {
1567             tcg_temp_free_i32(addr);
1568             return 1;
1569         }
1570         if (insn & ARM_CP_RW_BIT) {
1571             if ((insn >> 28) == 0xf) {                  /* WLDRW wCx */
1572                 tmp = tcg_temp_new_i32();
1573                 gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1574                 iwmmxt_store_creg(wrd, tmp);
1575             } else {
1576                 i = 1;
1577                 if (insn & (1 << 8)) {
1578                     if (insn & (1 << 22)) {             /* WLDRD */
1579                         gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1580                         i = 0;
1581                     } else {                            /* WLDRW wRd */
1582                         tmp = tcg_temp_new_i32();
1583                         gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1584                     }
1585                 } else {
1586                     tmp = tcg_temp_new_i32();
1587                     if (insn & (1 << 22)) {             /* WLDRH */
1588                         gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1589                     } else {                            /* WLDRB */
1590                         gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1591                     }
1592                 }
1593                 if (i) {
1594                     tcg_gen_extu_i32_i64(cpu_M0, tmp);
1595                     tcg_temp_free_i32(tmp);
1596                 }
1597                 gen_op_iwmmxt_movq_wRn_M0(wrd);
1598             }
1599         } else {
1600             if ((insn >> 28) == 0xf) {                  /* WSTRW wCx */
1601                 tmp = iwmmxt_load_creg(wrd);
1602                 gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1603             } else {
1604                 gen_op_iwmmxt_movq_M0_wRn(wrd);
1605                 tmp = tcg_temp_new_i32();
1606                 if (insn & (1 << 8)) {
1607                     if (insn & (1 << 22)) {             /* WSTRD */
1608                         gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1609                     } else {                            /* WSTRW wRd */
1610                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1611                         gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1612                     }
1613                 } else {
1614                     if (insn & (1 << 22)) {             /* WSTRH */
1615                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1616                         gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1617                     } else {                            /* WSTRB */
1618                         tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1619                         gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1620                     }
1621                 }
1622             }
1623             tcg_temp_free_i32(tmp);
1624         }
1625         tcg_temp_free_i32(addr);
1626         return 0;
1627     }
1628
1629     if ((insn & 0x0f000000) != 0x0e000000)
1630         return 1;
1631
1632     switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1633     case 0x000:                                                 /* WOR */
1634         wrd = (insn >> 12) & 0xf;
1635         rd0 = (insn >> 0) & 0xf;
1636         rd1 = (insn >> 16) & 0xf;
1637         gen_op_iwmmxt_movq_M0_wRn(rd0);
1638         gen_op_iwmmxt_orq_M0_wRn(rd1);
1639         gen_op_iwmmxt_setpsr_nz();
1640         gen_op_iwmmxt_movq_wRn_M0(wrd);
1641         gen_op_iwmmxt_set_mup();
1642         gen_op_iwmmxt_set_cup();
1643         break;
1644     case 0x011:                                                 /* TMCR */
1645         if (insn & 0xf)
1646             return 1;
1647         rd = (insn >> 12) & 0xf;
1648         wrd = (insn >> 16) & 0xf;
1649         switch (wrd) {
1650         case ARM_IWMMXT_wCID:
1651         case ARM_IWMMXT_wCASF:
1652             break;
1653         case ARM_IWMMXT_wCon:
1654             gen_op_iwmmxt_set_cup();
1655             /* Fall through.  */
1656         case ARM_IWMMXT_wCSSF:
1657             tmp = iwmmxt_load_creg(wrd);
1658             tmp2 = load_reg(s, rd);
1659             tcg_gen_andc_i32(tmp, tmp, tmp2);
1660             tcg_temp_free_i32(tmp2);
1661             iwmmxt_store_creg(wrd, tmp);
1662             break;
1663         case ARM_IWMMXT_wCGR0:
1664         case ARM_IWMMXT_wCGR1:
1665         case ARM_IWMMXT_wCGR2:
1666         case ARM_IWMMXT_wCGR3:
1667             gen_op_iwmmxt_set_cup();
1668             tmp = load_reg(s, rd);
1669             iwmmxt_store_creg(wrd, tmp);
1670             break;
1671         default:
1672             return 1;
1673         }
1674         break;
1675     case 0x100:                                                 /* WXOR */
1676         wrd = (insn >> 12) & 0xf;
1677         rd0 = (insn >> 0) & 0xf;
1678         rd1 = (insn >> 16) & 0xf;
1679         gen_op_iwmmxt_movq_M0_wRn(rd0);
1680         gen_op_iwmmxt_xorq_M0_wRn(rd1);
1681         gen_op_iwmmxt_setpsr_nz();
1682         gen_op_iwmmxt_movq_wRn_M0(wrd);
1683         gen_op_iwmmxt_set_mup();
1684         gen_op_iwmmxt_set_cup();
1685         break;
1686     case 0x111:                                                 /* TMRC */
1687         if (insn & 0xf)
1688             return 1;
1689         rd = (insn >> 12) & 0xf;
1690         wrd = (insn >> 16) & 0xf;
1691         tmp = iwmmxt_load_creg(wrd);
1692         store_reg(s, rd, tmp);
1693         break;
1694     case 0x300:                                                 /* WANDN */
1695         wrd = (insn >> 12) & 0xf;
1696         rd0 = (insn >> 0) & 0xf;
1697         rd1 = (insn >> 16) & 0xf;
1698         gen_op_iwmmxt_movq_M0_wRn(rd0);
1699         tcg_gen_neg_i64(cpu_M0, cpu_M0);
1700         gen_op_iwmmxt_andq_M0_wRn(rd1);
1701         gen_op_iwmmxt_setpsr_nz();
1702         gen_op_iwmmxt_movq_wRn_M0(wrd);
1703         gen_op_iwmmxt_set_mup();
1704         gen_op_iwmmxt_set_cup();
1705         break;
1706     case 0x200:                                                 /* WAND */
1707         wrd = (insn >> 12) & 0xf;
1708         rd0 = (insn >> 0) & 0xf;
1709         rd1 = (insn >> 16) & 0xf;
1710         gen_op_iwmmxt_movq_M0_wRn(rd0);
1711         gen_op_iwmmxt_andq_M0_wRn(rd1);
1712         gen_op_iwmmxt_setpsr_nz();
1713         gen_op_iwmmxt_movq_wRn_M0(wrd);
1714         gen_op_iwmmxt_set_mup();
1715         gen_op_iwmmxt_set_cup();
1716         break;
1717     case 0x810: case 0xa10:                             /* WMADD */
1718         wrd = (insn >> 12) & 0xf;
1719         rd0 = (insn >> 0) & 0xf;
1720         rd1 = (insn >> 16) & 0xf;
1721         gen_op_iwmmxt_movq_M0_wRn(rd0);
1722         if (insn & (1 << 21))
1723             gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1724         else
1725             gen_op_iwmmxt_madduq_M0_wRn(rd1);
1726         gen_op_iwmmxt_movq_wRn_M0(wrd);
1727         gen_op_iwmmxt_set_mup();
1728         break;
1729     case 0x10e: case 0x50e: case 0x90e: case 0xd0e:     /* WUNPCKIL */
1730         wrd = (insn >> 12) & 0xf;
1731         rd0 = (insn >> 16) & 0xf;
1732         rd1 = (insn >> 0) & 0xf;
1733         gen_op_iwmmxt_movq_M0_wRn(rd0);
1734         switch ((insn >> 22) & 3) {
1735         case 0:
1736             gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1737             break;
1738         case 1:
1739             gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1740             break;
1741         case 2:
1742             gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1743             break;
1744         case 3:
1745             return 1;
1746         }
1747         gen_op_iwmmxt_movq_wRn_M0(wrd);
1748         gen_op_iwmmxt_set_mup();
1749         gen_op_iwmmxt_set_cup();
1750         break;
1751     case 0x10c: case 0x50c: case 0x90c: case 0xd0c:     /* WUNPCKIH */
1752         wrd = (insn >> 12) & 0xf;
1753         rd0 = (insn >> 16) & 0xf;
1754         rd1 = (insn >> 0) & 0xf;
1755         gen_op_iwmmxt_movq_M0_wRn(rd0);
1756         switch ((insn >> 22) & 3) {
1757         case 0:
1758             gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1759             break;
1760         case 1:
1761             gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1762             break;
1763         case 2:
1764             gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1765             break;
1766         case 3:
1767             return 1;
1768         }
1769         gen_op_iwmmxt_movq_wRn_M0(wrd);
1770         gen_op_iwmmxt_set_mup();
1771         gen_op_iwmmxt_set_cup();
1772         break;
1773     case 0x012: case 0x112: case 0x412: case 0x512:     /* WSAD */
1774         wrd = (insn >> 12) & 0xf;
1775         rd0 = (insn >> 16) & 0xf;
1776         rd1 = (insn >> 0) & 0xf;
1777         gen_op_iwmmxt_movq_M0_wRn(rd0);
1778         if (insn & (1 << 22))
1779             gen_op_iwmmxt_sadw_M0_wRn(rd1);
1780         else
1781             gen_op_iwmmxt_sadb_M0_wRn(rd1);
1782         if (!(insn & (1 << 20)))
1783             gen_op_iwmmxt_addl_M0_wRn(wrd);
1784         gen_op_iwmmxt_movq_wRn_M0(wrd);
1785         gen_op_iwmmxt_set_mup();
1786         break;
1787     case 0x010: case 0x110: case 0x210: case 0x310:     /* WMUL */
1788         wrd = (insn >> 12) & 0xf;
1789         rd0 = (insn >> 16) & 0xf;
1790         rd1 = (insn >> 0) & 0xf;
1791         gen_op_iwmmxt_movq_M0_wRn(rd0);
1792         if (insn & (1 << 21)) {
1793             if (insn & (1 << 20))
1794                 gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1795             else
1796                 gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1797         } else {
1798             if (insn & (1 << 20))
1799                 gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1800             else
1801                 gen_op_iwmmxt_mululw_M0_wRn(rd1);
1802         }
1803         gen_op_iwmmxt_movq_wRn_M0(wrd);
1804         gen_op_iwmmxt_set_mup();
1805         break;
1806     case 0x410: case 0x510: case 0x610: case 0x710:     /* WMAC */
1807         wrd = (insn >> 12) & 0xf;
1808         rd0 = (insn >> 16) & 0xf;
1809         rd1 = (insn >> 0) & 0xf;
1810         gen_op_iwmmxt_movq_M0_wRn(rd0);
1811         if (insn & (1 << 21))
1812             gen_op_iwmmxt_macsw_M0_wRn(rd1);
1813         else
1814             gen_op_iwmmxt_macuw_M0_wRn(rd1);
1815         if (!(insn & (1 << 20))) {
1816             iwmmxt_load_reg(cpu_V1, wrd);
1817             tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1818         }
1819         gen_op_iwmmxt_movq_wRn_M0(wrd);
1820         gen_op_iwmmxt_set_mup();
1821         break;
1822     case 0x006: case 0x406: case 0x806: case 0xc06:     /* WCMPEQ */
1823         wrd = (insn >> 12) & 0xf;
1824         rd0 = (insn >> 16) & 0xf;
1825         rd1 = (insn >> 0) & 0xf;
1826         gen_op_iwmmxt_movq_M0_wRn(rd0);
1827         switch ((insn >> 22) & 3) {
1828         case 0:
1829             gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1830             break;
1831         case 1:
1832             gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1833             break;
1834         case 2:
1835             gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1836             break;
1837         case 3:
1838             return 1;
1839         }
1840         gen_op_iwmmxt_movq_wRn_M0(wrd);
1841         gen_op_iwmmxt_set_mup();
1842         gen_op_iwmmxt_set_cup();
1843         break;
1844     case 0x800: case 0x900: case 0xc00: case 0xd00:     /* WAVG2 */
1845         wrd = (insn >> 12) & 0xf;
1846         rd0 = (insn >> 16) & 0xf;
1847         rd1 = (insn >> 0) & 0xf;
1848         gen_op_iwmmxt_movq_M0_wRn(rd0);
1849         if (insn & (1 << 22)) {
1850             if (insn & (1 << 20))
1851                 gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1852             else
1853                 gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1854         } else {
1855             if (insn & (1 << 20))
1856                 gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1857             else
1858                 gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1859         }
1860         gen_op_iwmmxt_movq_wRn_M0(wrd);
1861         gen_op_iwmmxt_set_mup();
1862         gen_op_iwmmxt_set_cup();
1863         break;
1864     case 0x802: case 0x902: case 0xa02: case 0xb02:     /* WALIGNR */
1865         wrd = (insn >> 12) & 0xf;
1866         rd0 = (insn >> 16) & 0xf;
1867         rd1 = (insn >> 0) & 0xf;
1868         gen_op_iwmmxt_movq_M0_wRn(rd0);
1869         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1870         tcg_gen_andi_i32(tmp, tmp, 7);
1871         iwmmxt_load_reg(cpu_V1, rd1);
1872         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1873         tcg_temp_free_i32(tmp);
1874         gen_op_iwmmxt_movq_wRn_M0(wrd);
1875         gen_op_iwmmxt_set_mup();
1876         break;
1877     case 0x601: case 0x605: case 0x609: case 0x60d:     /* TINSR */
1878         if (((insn >> 6) & 3) == 3)
1879             return 1;
1880         rd = (insn >> 12) & 0xf;
1881         wrd = (insn >> 16) & 0xf;
1882         tmp = load_reg(s, rd);
1883         gen_op_iwmmxt_movq_M0_wRn(wrd);
1884         switch ((insn >> 6) & 3) {
1885         case 0:
1886             tmp2 = tcg_const_i32(0xff);
1887             tmp3 = tcg_const_i32((insn & 7) << 3);
1888             break;
1889         case 1:
1890             tmp2 = tcg_const_i32(0xffff);
1891             tmp3 = tcg_const_i32((insn & 3) << 4);
1892             break;
1893         case 2:
1894             tmp2 = tcg_const_i32(0xffffffff);
1895             tmp3 = tcg_const_i32((insn & 1) << 5);
1896             break;
1897         default:
1898             tmp2 = NULL;
1899             tmp3 = NULL;
1900         }
1901         gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1902         tcg_temp_free_i32(tmp3);
1903         tcg_temp_free_i32(tmp2);
1904         tcg_temp_free_i32(tmp);
1905         gen_op_iwmmxt_movq_wRn_M0(wrd);
1906         gen_op_iwmmxt_set_mup();
1907         break;
1908     case 0x107: case 0x507: case 0x907: case 0xd07:     /* TEXTRM */
1909         rd = (insn >> 12) & 0xf;
1910         wrd = (insn >> 16) & 0xf;
1911         if (rd == 15 || ((insn >> 22) & 3) == 3)
1912             return 1;
1913         gen_op_iwmmxt_movq_M0_wRn(wrd);
1914         tmp = tcg_temp_new_i32();
1915         switch ((insn >> 22) & 3) {
1916         case 0:
1917             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1918             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1919             if (insn & 8) {
1920                 tcg_gen_ext8s_i32(tmp, tmp);
1921             } else {
1922                 tcg_gen_andi_i32(tmp, tmp, 0xff);
1923             }
1924             break;
1925         case 1:
1926             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1927             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1928             if (insn & 8) {
1929                 tcg_gen_ext16s_i32(tmp, tmp);
1930             } else {
1931                 tcg_gen_andi_i32(tmp, tmp, 0xffff);
1932             }
1933             break;
1934         case 2:
1935             tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1936             tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1937             break;
1938         }
1939         store_reg(s, rd, tmp);
1940         break;
1941     case 0x117: case 0x517: case 0x917: case 0xd17:     /* TEXTRC */
1942         if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1943             return 1;
1944         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1945         switch ((insn >> 22) & 3) {
1946         case 0:
1947             tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1948             break;
1949         case 1:
1950             tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1951             break;
1952         case 2:
1953             tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1954             break;
1955         }
1956         tcg_gen_shli_i32(tmp, tmp, 28);
1957         gen_set_nzcv(tmp);
1958         tcg_temp_free_i32(tmp);
1959         break;
1960     case 0x401: case 0x405: case 0x409: case 0x40d:     /* TBCST */
1961         if (((insn >> 6) & 3) == 3)
1962             return 1;
1963         rd = (insn >> 12) & 0xf;
1964         wrd = (insn >> 16) & 0xf;
1965         tmp = load_reg(s, rd);
1966         switch ((insn >> 6) & 3) {
1967         case 0:
1968             gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1969             break;
1970         case 1:
1971             gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1972             break;
1973         case 2:
1974             gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1975             break;
1976         }
1977         tcg_temp_free_i32(tmp);
1978         gen_op_iwmmxt_movq_wRn_M0(wrd);
1979         gen_op_iwmmxt_set_mup();
1980         break;
1981     case 0x113: case 0x513: case 0x913: case 0xd13:     /* TANDC */
1982         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1983             return 1;
1984         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1985         tmp2 = tcg_temp_new_i32();
1986         tcg_gen_mov_i32(tmp2, tmp);
1987         switch ((insn >> 22) & 3) {
1988         case 0:
1989             for (i = 0; i < 7; i ++) {
1990                 tcg_gen_shli_i32(tmp2, tmp2, 4);
1991                 tcg_gen_and_i32(tmp, tmp, tmp2);
1992             }
1993             break;
1994         case 1:
1995             for (i = 0; i < 3; i ++) {
1996                 tcg_gen_shli_i32(tmp2, tmp2, 8);
1997                 tcg_gen_and_i32(tmp, tmp, tmp2);
1998             }
1999             break;
2000         case 2:
2001             tcg_gen_shli_i32(tmp2, tmp2, 16);
2002             tcg_gen_and_i32(tmp, tmp, tmp2);
2003             break;
2004         }
2005         gen_set_nzcv(tmp);
2006         tcg_temp_free_i32(tmp2);
2007         tcg_temp_free_i32(tmp);
2008         break;
2009     case 0x01c: case 0x41c: case 0x81c: case 0xc1c:     /* WACC */
2010         wrd = (insn >> 12) & 0xf;
2011         rd0 = (insn >> 16) & 0xf;
2012         gen_op_iwmmxt_movq_M0_wRn(rd0);
2013         switch ((insn >> 22) & 3) {
2014         case 0:
2015             gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2016             break;
2017         case 1:
2018             gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2019             break;
2020         case 2:
2021             gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2022             break;
2023         case 3:
2024             return 1;
2025         }
2026         gen_op_iwmmxt_movq_wRn_M0(wrd);
2027         gen_op_iwmmxt_set_mup();
2028         break;
2029     case 0x115: case 0x515: case 0x915: case 0xd15:     /* TORC */
2030         if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
2031             return 1;
2032         tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2033         tmp2 = tcg_temp_new_i32();
2034         tcg_gen_mov_i32(tmp2, tmp);
2035         switch ((insn >> 22) & 3) {
2036         case 0:
2037             for (i = 0; i < 7; i ++) {
2038                 tcg_gen_shli_i32(tmp2, tmp2, 4);
2039                 tcg_gen_or_i32(tmp, tmp, tmp2);
2040             }
2041             break;
2042         case 1:
2043             for (i = 0; i < 3; i ++) {
2044                 tcg_gen_shli_i32(tmp2, tmp2, 8);
2045                 tcg_gen_or_i32(tmp, tmp, tmp2);
2046             }
2047             break;
2048         case 2:
2049             tcg_gen_shli_i32(tmp2, tmp2, 16);
2050             tcg_gen_or_i32(tmp, tmp, tmp2);
2051             break;
2052         }
2053         gen_set_nzcv(tmp);
2054         tcg_temp_free_i32(tmp2);
2055         tcg_temp_free_i32(tmp);
2056         break;
2057     case 0x103: case 0x503: case 0x903: case 0xd03:     /* TMOVMSK */
2058         rd = (insn >> 12) & 0xf;
2059         rd0 = (insn >> 16) & 0xf;
2060         if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
2061             return 1;
2062         gen_op_iwmmxt_movq_M0_wRn(rd0);
2063         tmp = tcg_temp_new_i32();
2064         switch ((insn >> 22) & 3) {
2065         case 0:
2066             gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2067             break;
2068         case 1:
2069             gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2070             break;
2071         case 2:
2072             gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2073             break;
2074         }
2075         store_reg(s, rd, tmp);
2076         break;
2077     case 0x106: case 0x306: case 0x506: case 0x706:     /* WCMPGT */
2078     case 0x906: case 0xb06: case 0xd06: case 0xf06:
2079         wrd = (insn >> 12) & 0xf;
2080         rd0 = (insn >> 16) & 0xf;
2081         rd1 = (insn >> 0) & 0xf;
2082         gen_op_iwmmxt_movq_M0_wRn(rd0);
2083         switch ((insn >> 22) & 3) {
2084         case 0:
2085             if (insn & (1 << 21))
2086                 gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2087             else
2088                 gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2089             break;
2090         case 1:
2091             if (insn & (1 << 21))
2092                 gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2093             else
2094                 gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2095             break;
2096         case 2:
2097             if (insn & (1 << 21))
2098                 gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2099             else
2100                 gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2101             break;
2102         case 3:
2103             return 1;
2104         }
2105         gen_op_iwmmxt_movq_wRn_M0(wrd);
2106         gen_op_iwmmxt_set_mup();
2107         gen_op_iwmmxt_set_cup();
2108         break;
2109     case 0x00e: case 0x20e: case 0x40e: case 0x60e:     /* WUNPCKEL */
2110     case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2111         wrd = (insn >> 12) & 0xf;
2112         rd0 = (insn >> 16) & 0xf;
2113         gen_op_iwmmxt_movq_M0_wRn(rd0);
2114         switch ((insn >> 22) & 3) {
2115         case 0:
2116             if (insn & (1 << 21))
2117                 gen_op_iwmmxt_unpacklsb_M0();
2118             else
2119                 gen_op_iwmmxt_unpacklub_M0();
2120             break;
2121         case 1:
2122             if (insn & (1 << 21))
2123                 gen_op_iwmmxt_unpacklsw_M0();
2124             else
2125                 gen_op_iwmmxt_unpackluw_M0();
2126             break;
2127         case 2:
2128             if (insn & (1 << 21))
2129                 gen_op_iwmmxt_unpacklsl_M0();
2130             else
2131                 gen_op_iwmmxt_unpacklul_M0();
2132             break;
2133         case 3:
2134             return 1;
2135         }
2136         gen_op_iwmmxt_movq_wRn_M0(wrd);
2137         gen_op_iwmmxt_set_mup();
2138         gen_op_iwmmxt_set_cup();
2139         break;
2140     case 0x00c: case 0x20c: case 0x40c: case 0x60c:     /* WUNPCKEH */
2141     case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2142         wrd = (insn >> 12) & 0xf;
2143         rd0 = (insn >> 16) & 0xf;
2144         gen_op_iwmmxt_movq_M0_wRn(rd0);
2145         switch ((insn >> 22) & 3) {
2146         case 0:
2147             if (insn & (1 << 21))
2148                 gen_op_iwmmxt_unpackhsb_M0();
2149             else
2150                 gen_op_iwmmxt_unpackhub_M0();
2151             break;
2152         case 1:
2153             if (insn & (1 << 21))
2154                 gen_op_iwmmxt_unpackhsw_M0();
2155             else
2156                 gen_op_iwmmxt_unpackhuw_M0();
2157             break;
2158         case 2:
2159             if (insn & (1 << 21))
2160                 gen_op_iwmmxt_unpackhsl_M0();
2161             else
2162                 gen_op_iwmmxt_unpackhul_M0();
2163             break;
2164         case 3:
2165             return 1;
2166         }
2167         gen_op_iwmmxt_movq_wRn_M0(wrd);
2168         gen_op_iwmmxt_set_mup();
2169         gen_op_iwmmxt_set_cup();
2170         break;
2171     case 0x204: case 0x604: case 0xa04: case 0xe04:     /* WSRL */
2172     case 0x214: case 0x614: case 0xa14: case 0xe14:
2173         if (((insn >> 22) & 3) == 0)
2174             return 1;
2175         wrd = (insn >> 12) & 0xf;
2176         rd0 = (insn >> 16) & 0xf;
2177         gen_op_iwmmxt_movq_M0_wRn(rd0);
2178         tmp = tcg_temp_new_i32();
2179         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2180             tcg_temp_free_i32(tmp);
2181             return 1;
2182         }
2183         switch ((insn >> 22) & 3) {
2184         case 1:
2185             gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2186             break;
2187         case 2:
2188             gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2189             break;
2190         case 3:
2191             gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2192             break;
2193         }
2194         tcg_temp_free_i32(tmp);
2195         gen_op_iwmmxt_movq_wRn_M0(wrd);
2196         gen_op_iwmmxt_set_mup();
2197         gen_op_iwmmxt_set_cup();
2198         break;
2199     case 0x004: case 0x404: case 0x804: case 0xc04:     /* WSRA */
2200     case 0x014: case 0x414: case 0x814: case 0xc14:
2201         if (((insn >> 22) & 3) == 0)
2202             return 1;
2203         wrd = (insn >> 12) & 0xf;
2204         rd0 = (insn >> 16) & 0xf;
2205         gen_op_iwmmxt_movq_M0_wRn(rd0);
2206         tmp = tcg_temp_new_i32();
2207         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2208             tcg_temp_free_i32(tmp);
2209             return 1;
2210         }
2211         switch ((insn >> 22) & 3) {
2212         case 1:
2213             gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2214             break;
2215         case 2:
2216             gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2217             break;
2218         case 3:
2219             gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2220             break;
2221         }
2222         tcg_temp_free_i32(tmp);
2223         gen_op_iwmmxt_movq_wRn_M0(wrd);
2224         gen_op_iwmmxt_set_mup();
2225         gen_op_iwmmxt_set_cup();
2226         break;
2227     case 0x104: case 0x504: case 0x904: case 0xd04:     /* WSLL */
2228     case 0x114: case 0x514: case 0x914: case 0xd14:
2229         if (((insn >> 22) & 3) == 0)
2230             return 1;
2231         wrd = (insn >> 12) & 0xf;
2232         rd0 = (insn >> 16) & 0xf;
2233         gen_op_iwmmxt_movq_M0_wRn(rd0);
2234         tmp = tcg_temp_new_i32();
2235         if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2236             tcg_temp_free_i32(tmp);
2237             return 1;
2238         }
2239         switch ((insn >> 22) & 3) {
2240         case 1:
2241             gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2242             break;
2243         case 2:
2244             gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2245             break;
2246         case 3:
2247             gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2248             break;
2249         }
2250         tcg_temp_free_i32(tmp);
2251         gen_op_iwmmxt_movq_wRn_M0(wrd);
2252         gen_op_iwmmxt_set_mup();
2253         gen_op_iwmmxt_set_cup();
2254         break;
2255     case 0x304: case 0x704: case 0xb04: case 0xf04:     /* WROR */
2256     case 0x314: case 0x714: case 0xb14: case 0xf14:
2257         if (((insn >> 22) & 3) == 0)
2258             return 1;
2259         wrd = (insn >> 12) & 0xf;
2260         rd0 = (insn >> 16) & 0xf;
2261         gen_op_iwmmxt_movq_M0_wRn(rd0);
2262         tmp = tcg_temp_new_i32();
2263         switch ((insn >> 22) & 3) {
2264         case 1:
2265             if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2266                 tcg_temp_free_i32(tmp);
2267                 return 1;
2268             }
2269             gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2270             break;
2271         case 2:
2272             if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2273                 tcg_temp_free_i32(tmp);
2274                 return 1;
2275             }
2276             gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2277             break;
2278         case 3:
2279             if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2280                 tcg_temp_free_i32(tmp);
2281                 return 1;
2282             }
2283             gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2284             break;
2285         }
2286         tcg_temp_free_i32(tmp);
2287         gen_op_iwmmxt_movq_wRn_M0(wrd);
2288         gen_op_iwmmxt_set_mup();
2289         gen_op_iwmmxt_set_cup();
2290         break;
2291     case 0x116: case 0x316: case 0x516: case 0x716:     /* WMIN */
2292     case 0x916: case 0xb16: case 0xd16: case 0xf16:
2293         wrd = (insn >> 12) & 0xf;
2294         rd0 = (insn >> 16) & 0xf;
2295         rd1 = (insn >> 0) & 0xf;
2296         gen_op_iwmmxt_movq_M0_wRn(rd0);
2297         switch ((insn >> 22) & 3) {
2298         case 0:
2299             if (insn & (1 << 21))
2300                 gen_op_iwmmxt_minsb_M0_wRn(rd1);
2301             else
2302                 gen_op_iwmmxt_minub_M0_wRn(rd1);
2303             break;
2304         case 1:
2305             if (insn & (1 << 21))
2306                 gen_op_iwmmxt_minsw_M0_wRn(rd1);
2307             else
2308                 gen_op_iwmmxt_minuw_M0_wRn(rd1);
2309             break;
2310         case 2:
2311             if (insn & (1 << 21))
2312                 gen_op_iwmmxt_minsl_M0_wRn(rd1);
2313             else
2314                 gen_op_iwmmxt_minul_M0_wRn(rd1);
2315             break;
2316         case 3:
2317             return 1;
2318         }
2319         gen_op_iwmmxt_movq_wRn_M0(wrd);
2320         gen_op_iwmmxt_set_mup();
2321         break;
2322     case 0x016: case 0x216: case 0x416: case 0x616:     /* WMAX */
2323     case 0x816: case 0xa16: case 0xc16: case 0xe16:
2324         wrd = (insn >> 12) & 0xf;
2325         rd0 = (insn >> 16) & 0xf;
2326         rd1 = (insn >> 0) & 0xf;
2327         gen_op_iwmmxt_movq_M0_wRn(rd0);
2328         switch ((insn >> 22) & 3) {
2329         case 0:
2330             if (insn & (1 << 21))
2331                 gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2332             else
2333                 gen_op_iwmmxt_maxub_M0_wRn(rd1);
2334             break;
2335         case 1:
2336             if (insn & (1 << 21))
2337                 gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2338             else
2339                 gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2340             break;
2341         case 2:
2342             if (insn & (1 << 21))
2343                 gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2344             else
2345                 gen_op_iwmmxt_maxul_M0_wRn(rd1);
2346             break;
2347         case 3:
2348             return 1;
2349         }
2350         gen_op_iwmmxt_movq_wRn_M0(wrd);
2351         gen_op_iwmmxt_set_mup();
2352         break;
2353     case 0x002: case 0x102: case 0x202: case 0x302:     /* WALIGNI */
2354     case 0x402: case 0x502: case 0x602: case 0x702:
2355         wrd = (insn >> 12) & 0xf;
2356         rd0 = (insn >> 16) & 0xf;
2357         rd1 = (insn >> 0) & 0xf;
2358         gen_op_iwmmxt_movq_M0_wRn(rd0);
2359         tmp = tcg_const_i32((insn >> 20) & 3);
2360         iwmmxt_load_reg(cpu_V1, rd1);
2361         gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2362         tcg_temp_free_i32(tmp);
2363         gen_op_iwmmxt_movq_wRn_M0(wrd);
2364         gen_op_iwmmxt_set_mup();
2365         break;
2366     case 0x01a: case 0x11a: case 0x21a: case 0x31a:     /* WSUB */
2367     case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2368     case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2369     case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2370         wrd = (insn >> 12) & 0xf;
2371         rd0 = (insn >> 16) & 0xf;
2372         rd1 = (insn >> 0) & 0xf;
2373         gen_op_iwmmxt_movq_M0_wRn(rd0);
2374         switch ((insn >> 20) & 0xf) {
2375         case 0x0:
2376             gen_op_iwmmxt_subnb_M0_wRn(rd1);
2377             break;
2378         case 0x1:
2379             gen_op_iwmmxt_subub_M0_wRn(rd1);
2380             break;
2381         case 0x3:
2382             gen_op_iwmmxt_subsb_M0_wRn(rd1);
2383             break;
2384         case 0x4:
2385             gen_op_iwmmxt_subnw_M0_wRn(rd1);
2386             break;
2387         case 0x5:
2388             gen_op_iwmmxt_subuw_M0_wRn(rd1);
2389             break;
2390         case 0x7:
2391             gen_op_iwmmxt_subsw_M0_wRn(rd1);
2392             break;
2393         case 0x8:
2394             gen_op_iwmmxt_subnl_M0_wRn(rd1);
2395             break;
2396         case 0x9:
2397             gen_op_iwmmxt_subul_M0_wRn(rd1);
2398             break;
2399         case 0xb:
2400             gen_op_iwmmxt_subsl_M0_wRn(rd1);
2401             break;
2402         default:
2403             return 1;
2404         }
2405         gen_op_iwmmxt_movq_wRn_M0(wrd);
2406         gen_op_iwmmxt_set_mup();
2407         gen_op_iwmmxt_set_cup();
2408         break;
2409     case 0x01e: case 0x11e: case 0x21e: case 0x31e:     /* WSHUFH */
2410     case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2411     case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2412     case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2413         wrd = (insn >> 12) & 0xf;
2414         rd0 = (insn >> 16) & 0xf;
2415         gen_op_iwmmxt_movq_M0_wRn(rd0);
2416         tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2417         gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2418         tcg_temp_free_i32(tmp);
2419         gen_op_iwmmxt_movq_wRn_M0(wrd);
2420         gen_op_iwmmxt_set_mup();
2421         gen_op_iwmmxt_set_cup();
2422         break;
2423     case 0x018: case 0x118: case 0x218: case 0x318:     /* WADD */
2424     case 0x418: case 0x518: case 0x618: case 0x718:
2425     case 0x818: case 0x918: case 0xa18: case 0xb18:
2426     case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2427         wrd = (insn >> 12) & 0xf;
2428         rd0 = (insn >> 16) & 0xf;
2429         rd1 = (insn >> 0) & 0xf;
2430         gen_op_iwmmxt_movq_M0_wRn(rd0);
2431         switch ((insn >> 20) & 0xf) {
2432         case 0x0:
2433             gen_op_iwmmxt_addnb_M0_wRn(rd1);
2434             break;
2435         case 0x1:
2436             gen_op_iwmmxt_addub_M0_wRn(rd1);
2437             break;
2438         case 0x3:
2439             gen_op_iwmmxt_addsb_M0_wRn(rd1);
2440             break;
2441         case 0x4:
2442             gen_op_iwmmxt_addnw_M0_wRn(rd1);
2443             break;
2444         case 0x5:
2445             gen_op_iwmmxt_adduw_M0_wRn(rd1);
2446             break;
2447         case 0x7:
2448             gen_op_iwmmxt_addsw_M0_wRn(rd1);
2449             break;
2450         case 0x8:
2451             gen_op_iwmmxt_addnl_M0_wRn(rd1);
2452             break;
2453         case 0x9:
2454             gen_op_iwmmxt_addul_M0_wRn(rd1);
2455             break;
2456         case 0xb:
2457             gen_op_iwmmxt_addsl_M0_wRn(rd1);
2458             break;
2459         default:
2460             return 1;
2461         }
2462         gen_op_iwmmxt_movq_wRn_M0(wrd);
2463         gen_op_iwmmxt_set_mup();
2464         gen_op_iwmmxt_set_cup();
2465         break;
2466     case 0x008: case 0x108: case 0x208: case 0x308:     /* WPACK */
2467     case 0x408: case 0x508: case 0x608: case 0x708:
2468     case 0x808: case 0x908: case 0xa08: case 0xb08:
2469     case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2470         if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2471             return 1;
2472         wrd = (insn >> 12) & 0xf;
2473         rd0 = (insn >> 16) & 0xf;
2474         rd1 = (insn >> 0) & 0xf;
2475         gen_op_iwmmxt_movq_M0_wRn(rd0);
2476         switch ((insn >> 22) & 3) {
2477         case 1:
2478             if (insn & (1 << 21))
2479                 gen_op_iwmmxt_packsw_M0_wRn(rd1);
2480             else
2481                 gen_op_iwmmxt_packuw_M0_wRn(rd1);
2482             break;
2483         case 2:
2484             if (insn & (1 << 21))
2485                 gen_op_iwmmxt_packsl_M0_wRn(rd1);
2486             else
2487                 gen_op_iwmmxt_packul_M0_wRn(rd1);
2488             break;
2489         case 3:
2490             if (insn & (1 << 21))
2491                 gen_op_iwmmxt_packsq_M0_wRn(rd1);
2492             else
2493                 gen_op_iwmmxt_packuq_M0_wRn(rd1);
2494             break;
2495         }
2496         gen_op_iwmmxt_movq_wRn_M0(wrd);
2497         gen_op_iwmmxt_set_mup();
2498         gen_op_iwmmxt_set_cup();
2499         break;
2500     case 0x201: case 0x203: case 0x205: case 0x207:
2501     case 0x209: case 0x20b: case 0x20d: case 0x20f:
2502     case 0x211: case 0x213: case 0x215: case 0x217:
2503     case 0x219: case 0x21b: case 0x21d: case 0x21f:
2504         wrd = (insn >> 5) & 0xf;
2505         rd0 = (insn >> 12) & 0xf;
2506         rd1 = (insn >> 0) & 0xf;
2507         if (rd0 == 0xf || rd1 == 0xf)
2508             return 1;
2509         gen_op_iwmmxt_movq_M0_wRn(wrd);
2510         tmp = load_reg(s, rd0);
2511         tmp2 = load_reg(s, rd1);
2512         switch ((insn >> 16) & 0xf) {
2513         case 0x0:                                       /* TMIA */
2514             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2515             break;
2516         case 0x8:                                       /* TMIAPH */
2517             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2518             break;
2519         case 0xc: case 0xd: case 0xe: case 0xf:                 /* TMIAxy */
2520             if (insn & (1 << 16))
2521                 tcg_gen_shri_i32(tmp, tmp, 16);
2522             if (insn & (1 << 17))
2523                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2524             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2525             break;
2526         default:
2527             tcg_temp_free_i32(tmp2);
2528             tcg_temp_free_i32(tmp);
2529             return 1;
2530         }
2531         tcg_temp_free_i32(tmp2);
2532         tcg_temp_free_i32(tmp);
2533         gen_op_iwmmxt_movq_wRn_M0(wrd);
2534         gen_op_iwmmxt_set_mup();
2535         break;
2536     default:
2537         return 1;
2538     }
2539
2540     return 0;
2541 }
2542
2543 /* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2544    (ie. an undefined instruction).  */
2545 static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2546 {
2547     int acc, rd0, rd1, rdhi, rdlo;
2548     TCGv_i32 tmp, tmp2;
2549
2550     if ((insn & 0x0ff00f10) == 0x0e200010) {
2551         /* Multiply with Internal Accumulate Format */
2552         rd0 = (insn >> 12) & 0xf;
2553         rd1 = insn & 0xf;
2554         acc = (insn >> 5) & 7;
2555
2556         if (acc != 0)
2557             return 1;
2558
2559         tmp = load_reg(s, rd0);
2560         tmp2 = load_reg(s, rd1);
2561         switch ((insn >> 16) & 0xf) {
2562         case 0x0:                                       /* MIA */
2563             gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2564             break;
2565         case 0x8:                                       /* MIAPH */
2566             gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2567             break;
2568         case 0xc:                                       /* MIABB */
2569         case 0xd:                                       /* MIABT */
2570         case 0xe:                                       /* MIATB */
2571         case 0xf:                                       /* MIATT */
2572             if (insn & (1 << 16))
2573                 tcg_gen_shri_i32(tmp, tmp, 16);
2574             if (insn & (1 << 17))
2575                 tcg_gen_shri_i32(tmp2, tmp2, 16);
2576             gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2577             break;
2578         default:
2579             return 1;
2580         }
2581         tcg_temp_free_i32(tmp2);
2582         tcg_temp_free_i32(tmp);
2583
2584         gen_op_iwmmxt_movq_wRn_M0(acc);
2585         return 0;
2586     }
2587
2588     if ((insn & 0x0fe00ff8) == 0x0c400000) {
2589         /* Internal Accumulator Access Format */
2590         rdhi = (insn >> 16) & 0xf;
2591         rdlo = (insn >> 12) & 0xf;
2592         acc = insn & 7;
2593
2594         if (acc != 0)
2595             return 1;
2596
2597         if (insn & ARM_CP_RW_BIT) {                     /* MRA */
2598             iwmmxt_load_reg(cpu_V0, acc);
2599             tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2600             tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2601             tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2602         } else {                                        /* MAR */
2603             tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2604             iwmmxt_store_reg(cpu_V0, acc);
2605         }
2606         return 0;
2607     }
2608
2609     return 1;
2610 }
2611
2612 #define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2613 #define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2614     if (dc_isar_feature(aa32_simd_r32, s)) { \
2615         reg = (((insn) >> (bigbit)) & 0x0f) \
2616               | (((insn) >> ((smallbit) - 4)) & 0x10); \
2617     } else { \
2618         if (insn & (1 << (smallbit))) \
2619             return 1; \
2620         reg = ((insn) >> (bigbit)) & 0x0f; \
2621     }} while (0)
2622
2623 #define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2624 #define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2625 #define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2626
2627 static void gen_neon_dup_low16(TCGv_i32 var)
2628 {
2629     TCGv_i32 tmp = tcg_temp_new_i32();
2630     tcg_gen_ext16u_i32(var, var);
2631     tcg_gen_shli_i32(tmp, var, 16);
2632     tcg_gen_or_i32(var, var, tmp);
2633     tcg_temp_free_i32(tmp);
2634 }
2635
2636 static void gen_neon_dup_high16(TCGv_i32 var)
2637 {
2638     TCGv_i32 tmp = tcg_temp_new_i32();
2639     tcg_gen_andi_i32(var, var, 0xffff0000);
2640     tcg_gen_shri_i32(tmp, var, 16);
2641     tcg_gen_or_i32(var, var, tmp);
2642     tcg_temp_free_i32(tmp);
2643 }
2644
2645 static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2646 {
2647 #ifndef CONFIG_USER_ONLY
2648     return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
2649            ((s->base.pc_next - 1) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2650 #else
2651     return true;
2652 #endif
2653 }
2654
2655 static void gen_goto_ptr(void)
2656 {
2657     tcg_gen_lookup_and_goto_ptr();
2658 }
2659
2660 /* This will end the TB but doesn't guarantee we'll return to
2661  * cpu_loop_exec. Any live exit_requests will be processed as we
2662  * enter the next TB.
2663  */
2664 static void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
2665 {
2666     if (use_goto_tb(s, dest)) {
2667         tcg_gen_goto_tb(n);
2668         gen_set_pc_im(s, dest);
2669         tcg_gen_exit_tb(s->base.tb, n);
2670     } else {
2671         gen_set_pc_im(s, dest);
2672         gen_goto_ptr();
2673     }
2674     s->base.is_jmp = DISAS_NORETURN;
2675 }
2676
2677 static inline void gen_jmp (DisasContext *s, uint32_t dest)
2678 {
2679     if (unlikely(is_singlestepping(s))) {
2680         /* An indirect jump so that we still trigger the debug exception.  */
2681         gen_set_pc_im(s, dest);
2682         s->base.is_jmp = DISAS_JUMP;
2683     } else {
2684         gen_goto_tb(s, 0, dest);
2685     }
2686 }
2687
2688 static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2689 {
2690     if (x)
2691         tcg_gen_sari_i32(t0, t0, 16);
2692     else
2693         gen_sxth(t0);
2694     if (y)
2695         tcg_gen_sari_i32(t1, t1, 16);
2696     else
2697         gen_sxth(t1);
2698     tcg_gen_mul_i32(t0, t0, t1);
2699 }
2700
2701 /* Return the mask of PSR bits set by a MSR instruction.  */
2702 static uint32_t msr_mask(DisasContext *s, int flags, int spsr)
2703 {
2704     uint32_t mask = 0;
2705
2706     if (flags & (1 << 0)) {
2707         mask |= 0xff;
2708     }
2709     if (flags & (1 << 1)) {
2710         mask |= 0xff00;
2711     }
2712     if (flags & (1 << 2)) {
2713         mask |= 0xff0000;
2714     }
2715     if (flags & (1 << 3)) {
2716         mask |= 0xff000000;
2717     }
2718
2719     /* Mask out undefined and reserved bits.  */
2720     mask &= aarch32_cpsr_valid_mask(s->features, s->isar);
2721
2722     /* Mask out execution state.  */
2723     if (!spsr) {
2724         mask &= ~CPSR_EXEC;
2725     }
2726
2727     /* Mask out privileged bits.  */
2728     if (IS_USER(s)) {
2729         mask &= CPSR_USER;
2730     }
2731     return mask;
2732 }
2733
2734 /* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
2735 static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv_i32 t0)
2736 {
2737     TCGv_i32 tmp;
2738     if (spsr) {
2739         /* ??? This is also undefined in system mode.  */
2740         if (IS_USER(s))
2741             return 1;
2742
2743         tmp = load_cpu_field(spsr);
2744         tcg_gen_andi_i32(tmp, tmp, ~mask);
2745         tcg_gen_andi_i32(t0, t0, mask);
2746         tcg_gen_or_i32(tmp, tmp, t0);
2747         store_cpu_field(tmp, spsr);
2748     } else {
2749         gen_set_cpsr(t0, mask);
2750     }
2751     tcg_temp_free_i32(t0);
2752     gen_lookup_tb(s);
2753     return 0;
2754 }
2755
2756 /* Returns nonzero if access to the PSR is not permitted.  */
2757 static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
2758 {
2759     TCGv_i32 tmp;
2760     tmp = tcg_temp_new_i32();
2761     tcg_gen_movi_i32(tmp, val);
2762     return gen_set_psr(s, mask, spsr, tmp);
2763 }
2764
2765 static bool msr_banked_access_decode(DisasContext *s, int r, int sysm, int rn,
2766                                      int *tgtmode, int *regno)
2767 {
2768     /* Decode the r and sysm fields of MSR/MRS banked accesses into
2769      * the target mode and register number, and identify the various
2770      * unpredictable cases.
2771      * MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2772      *  + executed in user mode
2773      *  + using R15 as the src/dest register
2774      *  + accessing an unimplemented register
2775      *  + accessing a register that's inaccessible at current PL/security state*
2776      *  + accessing a register that you could access with a different insn
2777      * We choose to UNDEF in all these cases.
2778      * Since we don't know which of the various AArch32 modes we are in
2779      * we have to defer some checks to runtime.
2780      * Accesses to Monitor mode registers from Secure EL1 (which implies
2781      * that EL3 is AArch64) must trap to EL3.
2782      *
2783      * If the access checks fail this function will emit code to take
2784      * an exception and return false. Otherwise it will return true,
2785      * and set *tgtmode and *regno appropriately.
2786      */
2787     int exc_target = default_exception_el(s);
2788
2789     /* These instructions are present only in ARMv8, or in ARMv7 with the
2790      * Virtualization Extensions.
2791      */
2792     if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2793         !arm_dc_feature(s, ARM_FEATURE_EL2)) {
2794         goto undef;
2795     }
2796
2797     if (IS_USER(s) || rn == 15) {
2798         goto undef;
2799     }
2800
2801     /* The table in the v8 ARM ARM section F5.2.3 describes the encoding
2802      * of registers into (r, sysm).
2803      */
2804     if (r) {
2805         /* SPSRs for other modes */
2806         switch (sysm) {
2807         case 0xe: /* SPSR_fiq */
2808             *tgtmode = ARM_CPU_MODE_FIQ;
2809             break;
2810         case 0x10: /* SPSR_irq */
2811             *tgtmode = ARM_CPU_MODE_IRQ;
2812             break;
2813         case 0x12: /* SPSR_svc */
2814             *tgtmode = ARM_CPU_MODE_SVC;
2815             break;
2816         case 0x14: /* SPSR_abt */
2817             *tgtmode = ARM_CPU_MODE_ABT;
2818             break;
2819         case 0x16: /* SPSR_und */
2820             *tgtmode = ARM_CPU_MODE_UND;
2821             break;
2822         case 0x1c: /* SPSR_mon */
2823             *tgtmode = ARM_CPU_MODE_MON;
2824             break;
2825         case 0x1e: /* SPSR_hyp */
2826             *tgtmode = ARM_CPU_MODE_HYP;
2827             break;
2828         default: /* unallocated */
2829             goto undef;
2830         }
2831         /* We arbitrarily assign SPSR a register number of 16. */
2832         *regno = 16;
2833     } else {
2834         /* general purpose registers for other modes */
2835         switch (sysm) {
2836         case 0x0 ... 0x6:   /* 0b00xxx : r8_usr ... r14_usr */
2837             *tgtmode = ARM_CPU_MODE_USR;
2838             *regno = sysm + 8;
2839             break;
2840         case 0x8 ... 0xe:   /* 0b01xxx : r8_fiq ... r14_fiq */
2841             *tgtmode = ARM_CPU_MODE_FIQ;
2842             *regno = sysm;
2843             break;
2844         case 0x10 ... 0x11: /* 0b1000x : r14_irq, r13_irq */
2845             *tgtmode = ARM_CPU_MODE_IRQ;
2846             *regno = sysm & 1 ? 13 : 14;
2847             break;
2848         case 0x12 ... 0x13: /* 0b1001x : r14_svc, r13_svc */
2849             *tgtmode = ARM_CPU_MODE_SVC;
2850             *regno = sysm & 1 ? 13 : 14;
2851             break;
2852         case 0x14 ... 0x15: /* 0b1010x : r14_abt, r13_abt */
2853             *tgtmode = ARM_CPU_MODE_ABT;
2854             *regno = sysm & 1 ? 13 : 14;
2855             break;
2856         case 0x16 ... 0x17: /* 0b1011x : r14_und, r13_und */
2857             *tgtmode = ARM_CPU_MODE_UND;
2858             *regno = sysm & 1 ? 13 : 14;
2859             break;
2860         case 0x1c ... 0x1d: /* 0b1110x : r14_mon, r13_mon */
2861             *tgtmode = ARM_CPU_MODE_MON;
2862             *regno = sysm & 1 ? 13 : 14;
2863             break;
2864         case 0x1e ... 0x1f: /* 0b1111x : elr_hyp, r13_hyp */
2865             *tgtmode = ARM_CPU_MODE_HYP;
2866             /* Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) */
2867             *regno = sysm & 1 ? 13 : 17;
2868             break;
2869         default: /* unallocated */
2870             goto undef;
2871         }
2872     }
2873
2874     /* Catch the 'accessing inaccessible register' cases we can detect
2875      * at translate time.
2876      */
2877     switch (*tgtmode) {
2878     case ARM_CPU_MODE_MON:
2879         if (!arm_dc_feature(s, ARM_FEATURE_EL3) || s->ns) {
2880             goto undef;
2881         }
2882         if (s->current_el == 1) {
2883             /* If we're in Secure EL1 (which implies that EL3 is AArch64)
2884              * then accesses to Mon registers trap to EL3
2885              */
2886             exc_target = 3;
2887             goto undef;
2888         }
2889         break;
2890     case ARM_CPU_MODE_HYP:
2891         /*
2892          * SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2893          * (and so we can forbid accesses from EL2 or below). elr_hyp
2894          * can be accessed also from Hyp mode, so forbid accesses from
2895          * EL0 or EL1.
2896          */
2897         if (!arm_dc_feature(s, ARM_FEATURE_EL2) || s->current_el < 2 ||
2898             (s->current_el < 3 && *regno != 17)) {
2899             goto undef;
2900         }
2901         break;
2902     default:
2903         break;
2904     }
2905
2906     return true;
2907
2908 undef:
2909     /* If we get here then some access check did not pass */
2910     gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2911                        syn_uncategorized(), exc_target);
2912     return false;
2913 }
2914
2915 static void gen_msr_banked(DisasContext *s, int r, int sysm, int rn)
2916 {
2917     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2918     int tgtmode = 0, regno = 0;
2919
2920     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2921         return;
2922     }
2923
2924     /* Sync state because msr_banked() can raise exceptions */
2925     gen_set_condexec(s);
2926     gen_set_pc_im(s, s->pc_curr);
2927     tcg_reg = load_reg(s, rn);
2928     tcg_tgtmode = tcg_const_i32(tgtmode);
2929     tcg_regno = tcg_const_i32(regno);
2930     gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2931     tcg_temp_free_i32(tcg_tgtmode);
2932     tcg_temp_free_i32(tcg_regno);
2933     tcg_temp_free_i32(tcg_reg);
2934     s->base.is_jmp = DISAS_UPDATE;
2935 }
2936
2937 static void gen_mrs_banked(DisasContext *s, int r, int sysm, int rn)
2938 {
2939     TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2940     int tgtmode = 0, regno = 0;
2941
2942     if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2943         return;
2944     }
2945
2946     /* Sync state because mrs_banked() can raise exceptions */
2947     gen_set_condexec(s);
2948     gen_set_pc_im(s, s->pc_curr);
2949     tcg_reg = tcg_temp_new_i32();
2950     tcg_tgtmode = tcg_const_i32(tgtmode);
2951     tcg_regno = tcg_const_i32(regno);
2952     gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2953     tcg_temp_free_i32(tcg_tgtmode);
2954     tcg_temp_free_i32(tcg_regno);
2955     store_reg(s, rn, tcg_reg);
2956     s->base.is_jmp = DISAS_UPDATE;
2957 }
2958
2959 /* Store value to PC as for an exception return (ie don't
2960  * mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2961  * will do the masking based on the new value of the Thumb bit.
2962  */
2963 static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
2964 {
2965     tcg_gen_mov_i32(cpu_R[15], pc);
2966     tcg_temp_free_i32(pc);
2967 }
2968
2969 /* Generate a v6 exception return.  Marks both values as dead.  */
2970 static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
2971 {
2972     store_pc_exc_ret(s, pc);
2973     /* The cpsr_write_eret helper will mask the low bits of PC
2974      * appropriately depending on the new Thumb bit, so it must
2975      * be called after storing the new PC.
2976      */
2977     if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
2978         gen_io_start();
2979     }
2980     gen_helper_cpsr_write_eret(cpu_env, cpsr);
2981     tcg_temp_free_i32(cpsr);
2982     /* Must exit loop to check un-masked IRQs */
2983     s->base.is_jmp = DISAS_EXIT;
2984 }
2985
2986 /* Generate an old-style exception return. Marks pc as dead. */
2987 static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
2988 {
2989     gen_rfe(s, pc, load_cpu_field(spsr));
2990 }
2991
2992 #define CPU_V001 cpu_V0, cpu_V0, cpu_V1
2993
2994 static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
2995 {
2996     switch (size) {
2997     case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
2998     case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
2999     case 2: tcg_gen_add_i32(t0, t0, t1); break;
3000     default: abort();
3001     }
3002 }
3003
3004 static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3005 {
3006     switch (size) {
3007     case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3008     case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3009     case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3010     default: return;
3011     }
3012 }
3013
3014 static TCGv_i32 neon_load_scratch(int scratch)
3015 {
3016     TCGv_i32 tmp = tcg_temp_new_i32();
3017     tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3018     return tmp;
3019 }
3020
3021 static void neon_store_scratch(int scratch, TCGv_i32 var)
3022 {
3023     tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3024     tcg_temp_free_i32(var);
3025 }
3026
3027 static inline TCGv_i32 neon_get_scalar(int size, int reg)
3028 {
3029     TCGv_i32 tmp;
3030     if (size == 1) {
3031         tmp = neon_load_reg(reg & 7, reg >> 4);
3032         if (reg & 8) {
3033             gen_neon_dup_high16(tmp);
3034         } else {
3035             gen_neon_dup_low16(tmp);
3036         }
3037     } else {
3038         tmp = neon_load_reg(reg & 15, reg >> 4);
3039     }
3040     return tmp;
3041 }
3042
3043 static int gen_neon_unzip(int rd, int rm, int size, int q)
3044 {
3045     TCGv_ptr pd, pm;
3046
3047     if (!q && size == 2) {
3048         return 1;
3049     }
3050     pd = vfp_reg_ptr(true, rd);
3051     pm = vfp_reg_ptr(true, rm);
3052     if (q) {
3053         switch (size) {
3054         case 0:
3055             gen_helper_neon_qunzip8(pd, pm);
3056             break;
3057         case 1:
3058             gen_helper_neon_qunzip16(pd, pm);
3059             break;
3060         case 2:
3061             gen_helper_neon_qunzip32(pd, pm);
3062             break;
3063         default:
3064             abort();
3065         }
3066     } else {
3067         switch (size) {
3068         case 0:
3069             gen_helper_neon_unzip8(pd, pm);
3070             break;
3071         case 1:
3072             gen_helper_neon_unzip16(pd, pm);
3073             break;
3074         default:
3075             abort();
3076         }
3077     }
3078     tcg_temp_free_ptr(pd);
3079     tcg_temp_free_ptr(pm);
3080     return 0;
3081 }
3082
3083 static int gen_neon_zip(int rd, int rm, int size, int q)
3084 {
3085     TCGv_ptr pd, pm;
3086
3087     if (!q && size == 2) {
3088         return 1;
3089     }
3090     pd = vfp_reg_ptr(true, rd);
3091     pm = vfp_reg_ptr(true, rm);
3092     if (q) {
3093         switch (size) {
3094         case 0:
3095             gen_helper_neon_qzip8(pd, pm);
3096             break;
3097         case 1:
3098             gen_helper_neon_qzip16(pd, pm);
3099             break;
3100         case 2:
3101             gen_helper_neon_qzip32(pd, pm);
3102             break;
3103         default:
3104             abort();
3105         }
3106     } else {
3107         switch (size) {
3108         case 0:
3109             gen_helper_neon_zip8(pd, pm);
3110             break;
3111         case 1:
3112             gen_helper_neon_zip16(pd, pm);
3113             break;
3114         default:
3115             abort();
3116         }
3117     }
3118     tcg_temp_free_ptr(pd);
3119     tcg_temp_free_ptr(pm);
3120     return 0;
3121 }
3122
3123 static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3124 {
3125     TCGv_i32 rd, tmp;
3126
3127     rd = tcg_temp_new_i32();
3128     tmp = tcg_temp_new_i32();
3129
3130     tcg_gen_shli_i32(rd, t0, 8);
3131     tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3132     tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3133     tcg_gen_or_i32(rd, rd, tmp);
3134
3135     tcg_gen_shri_i32(t1, t1, 8);
3136     tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3137     tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3138     tcg_gen_or_i32(t1, t1, tmp);
3139     tcg_gen_mov_i32(t0, rd);
3140
3141     tcg_temp_free_i32(tmp);
3142     tcg_temp_free_i32(rd);
3143 }
3144
3145 static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3146 {
3147     TCGv_i32 rd, tmp;
3148
3149     rd = tcg_temp_new_i32();
3150     tmp = tcg_temp_new_i32();
3151
3152     tcg_gen_shli_i32(rd, t0, 16);
3153     tcg_gen_andi_i32(tmp, t1, 0xffff);
3154     tcg_gen_or_i32(rd, rd, tmp);
3155     tcg_gen_shri_i32(t1, t1, 16);
3156     tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3157     tcg_gen_or_i32(t1, t1, tmp);
3158     tcg_gen_mov_i32(t0, rd);
3159
3160     tcg_temp_free_i32(tmp);
3161     tcg_temp_free_i32(rd);
3162 }
3163
3164 static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3165 {
3166     switch (size) {
3167     case 0: gen_helper_neon_narrow_u8(dest, src); break;
3168     case 1: gen_helper_neon_narrow_u16(dest, src); break;
3169     case 2: tcg_gen_extrl_i64_i32(dest, src); break;
3170     default: abort();
3171     }
3172 }
3173
3174 static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3175 {
3176     switch (size) {
3177     case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3178     case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3179     case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3180     default: abort();
3181     }
3182 }
3183
3184 static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3185 {
3186     switch (size) {
3187     case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3188     case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3189     case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3190     default: abort();
3191     }
3192 }
3193
3194 static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3195 {
3196     switch (size) {
3197     case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3198     case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3199     case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3200     default: abort();
3201     }
3202 }
3203
3204 static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3205 {
3206     if (u) {
3207         switch (size) {
3208         case 0: gen_helper_neon_widen_u8(dest, src); break;
3209         case 1: gen_helper_neon_widen_u16(dest, src); break;
3210         case 2: tcg_gen_extu_i32_i64(dest, src); break;
3211         default: abort();
3212         }
3213     } else {
3214         switch (size) {
3215         case 0: gen_helper_neon_widen_s8(dest, src); break;
3216         case 1: gen_helper_neon_widen_s16(dest, src); break;
3217         case 2: tcg_gen_ext_i32_i64(dest, src); break;
3218         default: abort();
3219         }
3220     }
3221     tcg_temp_free_i32(src);
3222 }
3223
3224 static inline void gen_neon_addl(int size)
3225 {
3226     switch (size) {
3227     case 0: gen_helper_neon_addl_u16(CPU_V001); break;
3228     case 1: gen_helper_neon_addl_u32(CPU_V001); break;
3229     case 2: tcg_gen_add_i64(CPU_V001); break;
3230     default: abort();
3231     }
3232 }
3233
3234 static inline void gen_neon_negl(TCGv_i64 var, int size)
3235 {
3236     switch (size) {
3237     case 0: gen_helper_neon_negl_u16(var, var); break;
3238     case 1: gen_helper_neon_negl_u32(var, var); break;
3239     case 2:
3240         tcg_gen_neg_i64(var, var);
3241         break;
3242     default: abort();
3243     }
3244 }
3245
3246 static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3247 {
3248     switch (size) {
3249     case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3250     case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3251     default: abort();
3252     }
3253 }
3254
3255 static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3256                                  int size, int u)
3257 {
3258     TCGv_i64 tmp;
3259
3260     switch ((size << 1) | u) {
3261     case 0: gen_helper_neon_mull_s8(dest, a, b); break;
3262     case 1: gen_helper_neon_mull_u8(dest, a, b); break;
3263     case 2: gen_helper_neon_mull_s16(dest, a, b); break;
3264     case 3: gen_helper_neon_mull_u16(dest, a, b); break;
3265     case 4:
3266         tmp = gen_muls_i64_i32(a, b);
3267         tcg_gen_mov_i64(dest, tmp);
3268         tcg_temp_free_i64(tmp);
3269         break;
3270     case 5:
3271         tmp = gen_mulu_i64_i32(a, b);
3272         tcg_gen_mov_i64(dest, tmp);
3273         tcg_temp_free_i64(tmp);
3274         break;
3275     default: abort();
3276     }
3277
3278     /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
3279        Don't forget to clean them now.  */
3280     if (size < 2) {
3281         tcg_temp_free_i32(a);
3282         tcg_temp_free_i32(b);
3283     }
3284 }
3285
3286 static void gen_neon_narrow_op(int op, int u, int size,
3287                                TCGv_i32 dest, TCGv_i64 src)
3288 {
3289     if (op) {
3290         if (u) {
3291             gen_neon_unarrow_sats(size, dest, src);
3292         } else {
3293             gen_neon_narrow(size, dest, src);
3294         }
3295     } else {
3296         if (u) {
3297             gen_neon_narrow_satu(size, dest, src);
3298         } else {
3299             gen_neon_narrow_sats(size, dest, src);
3300         }
3301     }
3302 }
3303
3304 /* Symbolic constants for op fields for Neon 2-register miscellaneous.
3305  * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3306  * table A7-13.
3307  */
3308 #define NEON_2RM_VREV64 0
3309 #define NEON_2RM_VREV32 1
3310 #define NEON_2RM_VREV16 2
3311 #define NEON_2RM_VPADDL 4
3312 #define NEON_2RM_VPADDL_U 5
3313 #define NEON_2RM_AESE 6 /* Includes AESD */
3314 #define NEON_2RM_AESMC 7 /* Includes AESIMC */
3315 #define NEON_2RM_VCLS 8
3316 #define NEON_2RM_VCLZ 9
3317 #define NEON_2RM_VCNT 10
3318 #define NEON_2RM_VMVN 11
3319 #define NEON_2RM_VPADAL 12
3320 #define NEON_2RM_VPADAL_U 13
3321 #define NEON_2RM_VQABS 14
3322 #define NEON_2RM_VQNEG 15
3323 #define NEON_2RM_VCGT0 16
3324 #define NEON_2RM_VCGE0 17
3325 #define NEON_2RM_VCEQ0 18
3326 #define NEON_2RM_VCLE0 19
3327 #define NEON_2RM_VCLT0 20
3328 #define NEON_2RM_SHA1H 21
3329 #define NEON_2RM_VABS 22
3330 #define NEON_2RM_VNEG 23
3331 #define NEON_2RM_VCGT0_F 24
3332 #define NEON_2RM_VCGE0_F 25
3333 #define NEON_2RM_VCEQ0_F 26
3334 #define NEON_2RM_VCLE0_F 27
3335 #define NEON_2RM_VCLT0_F 28
3336 #define NEON_2RM_VABS_F 30
3337 #define NEON_2RM_VNEG_F 31
3338 #define NEON_2RM_VSWP 32
3339 #define NEON_2RM_VTRN 33
3340 #define NEON_2RM_VUZP 34
3341 #define NEON_2RM_VZIP 35
3342 #define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3343 #define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3344 #define NEON_2RM_VSHLL 38
3345 #define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3346 #define NEON_2RM_VRINTN 40
3347 #define NEON_2RM_VRINTX 41
3348 #define NEON_2RM_VRINTA 42
3349 #define NEON_2RM_VRINTZ 43
3350 #define NEON_2RM_VCVT_F16_F32 44
3351 #define NEON_2RM_VRINTM 45
3352 #define NEON_2RM_VCVT_F32_F16 46
3353 #define NEON_2RM_VRINTP 47
3354 #define NEON_2RM_VCVTAU 48
3355 #define NEON_2RM_VCVTAS 49
3356 #define NEON_2RM_VCVTNU 50
3357 #define NEON_2RM_VCVTNS 51
3358 #define NEON_2RM_VCVTPU 52
3359 #define NEON_2RM_VCVTPS 53
3360 #define NEON_2RM_VCVTMU 54
3361 #define NEON_2RM_VCVTMS 55
3362 #define NEON_2RM_VRECPE 56
3363 #define NEON_2RM_VRSQRTE 57
3364 #define NEON_2RM_VRECPE_F 58
3365 #define NEON_2RM_VRSQRTE_F 59
3366 #define NEON_2RM_VCVT_FS 60
3367 #define NEON_2RM_VCVT_FU 61
3368 #define NEON_2RM_VCVT_SF 62
3369 #define NEON_2RM_VCVT_UF 63
3370
3371 static bool neon_2rm_is_v8_op(int op)
3372 {
3373     /* Return true if this neon 2reg-misc op is ARMv8 and up */
3374     switch (op) {
3375     case NEON_2RM_VRINTN:
3376     case NEON_2RM_VRINTA:
3377     case NEON_2RM_VRINTM:
3378     case NEON_2RM_VRINTP:
3379     case NEON_2RM_VRINTZ:
3380     case NEON_2RM_VRINTX:
3381     case NEON_2RM_VCVTAU:
3382     case NEON_2RM_VCVTAS:
3383     case NEON_2RM_VCVTNU:
3384     case NEON_2RM_VCVTNS:
3385     case NEON_2RM_VCVTPU:
3386     case NEON_2RM_VCVTPS:
3387     case NEON_2RM_VCVTMU:
3388     case NEON_2RM_VCVTMS:
3389         return true;
3390     default:
3391         return false;
3392     }
3393 }
3394
3395 /* Each entry in this array has bit n set if the insn allows
3396  * size value n (otherwise it will UNDEF). Since unallocated
3397  * op values will have no bits set they always UNDEF.
3398  */
3399 static const uint8_t neon_2rm_sizes[] = {
3400     [NEON_2RM_VREV64] = 0x7,
3401     [NEON_2RM_VREV32] = 0x3,
3402     [NEON_2RM_VREV16] = 0x1,
3403     [NEON_2RM_VPADDL] = 0x7,
3404     [NEON_2RM_VPADDL_U] = 0x7,
3405     [NEON_2RM_AESE] = 0x1,
3406     [NEON_2RM_AESMC] = 0x1,
3407     [NEON_2RM_VCLS] = 0x7,
3408     [NEON_2RM_VCLZ] = 0x7,
3409     [NEON_2RM_VCNT] = 0x1,
3410     [NEON_2RM_VMVN] = 0x1,
3411     [NEON_2RM_VPADAL] = 0x7,
3412     [NEON_2RM_VPADAL_U] = 0x7,
3413     [NEON_2RM_VQABS] = 0x7,
3414     [NEON_2RM_VQNEG] = 0x7,
3415     [NEON_2RM_VCGT0] = 0x7,
3416     [NEON_2RM_VCGE0] = 0x7,
3417     [NEON_2RM_VCEQ0] = 0x7,
3418     [NEON_2RM_VCLE0] = 0x7,
3419     [NEON_2RM_VCLT0] = 0x7,
3420     [NEON_2RM_SHA1H] = 0x4,
3421     [NEON_2RM_VABS] = 0x7,
3422     [NEON_2RM_VNEG] = 0x7,
3423     [NEON_2RM_VCGT0_F] = 0x4,
3424     [NEON_2RM_VCGE0_F] = 0x4,
3425     [NEON_2RM_VCEQ0_F] = 0x4,
3426     [NEON_2RM_VCLE0_F] = 0x4,
3427     [NEON_2RM_VCLT0_F] = 0x4,
3428     [NEON_2RM_VABS_F] = 0x4,
3429     [NEON_2RM_VNEG_F] = 0x4,
3430     [NEON_2RM_VSWP] = 0x1,
3431     [NEON_2RM_VTRN] = 0x7,
3432     [NEON_2RM_VUZP] = 0x7,
3433     [NEON_2RM_VZIP] = 0x7,
3434     [NEON_2RM_VMOVN] = 0x7,
3435     [NEON_2RM_VQMOVN] = 0x7,
3436     [NEON_2RM_VSHLL] = 0x7,
3437     [NEON_2RM_SHA1SU1] = 0x4,
3438     [NEON_2RM_VRINTN] = 0x4,
3439     [NEON_2RM_VRINTX] = 0x4,
3440     [NEON_2RM_VRINTA] = 0x4,
3441     [NEON_2RM_VRINTZ] = 0x4,
3442     [NEON_2RM_VCVT_F16_F32] = 0x2,
3443     [NEON_2RM_VRINTM] = 0x4,
3444     [NEON_2RM_VCVT_F32_F16] = 0x2,
3445     [NEON_2RM_VRINTP] = 0x4,
3446     [NEON_2RM_VCVTAU] = 0x4,
3447     [NEON_2RM_VCVTAS] = 0x4,
3448     [NEON_2RM_VCVTNU] = 0x4,
3449     [NEON_2RM_VCVTNS] = 0x4,
3450     [NEON_2RM_VCVTPU] = 0x4,
3451     [NEON_2RM_VCVTPS] = 0x4,
3452     [NEON_2RM_VCVTMU] = 0x4,
3453     [NEON_2RM_VCVTMS] = 0x4,
3454     [NEON_2RM_VRECPE] = 0x4,
3455     [NEON_2RM_VRSQRTE] = 0x4,
3456     [NEON_2RM_VRECPE_F] = 0x4,
3457     [NEON_2RM_VRSQRTE_F] = 0x4,
3458     [NEON_2RM_VCVT_FS] = 0x4,
3459     [NEON_2RM_VCVT_FU] = 0x4,
3460     [NEON_2RM_VCVT_SF] = 0x4,
3461     [NEON_2RM_VCVT_UF] = 0x4,
3462 };
3463
3464 static void gen_gvec_fn3_qc(uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs,
3465                             uint32_t opr_sz, uint32_t max_sz,
3466                             gen_helper_gvec_3_ptr *fn)
3467 {
3468     TCGv_ptr qc_ptr = tcg_temp_new_ptr();
3469
3470     tcg_gen_addi_ptr(qc_ptr, cpu_env, offsetof(CPUARMState, vfp.qc));
3471     tcg_gen_gvec_3_ptr(rd_ofs, rn_ofs, rm_ofs, qc_ptr,
3472                        opr_sz, max_sz, 0, fn);
3473     tcg_temp_free_ptr(qc_ptr);
3474 }
3475
3476 void gen_gvec_sqrdmlah_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3477                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3478 {
3479     static gen_helper_gvec_3_ptr * const fns[2] = {
3480         gen_helper_gvec_qrdmlah_s16, gen_helper_gvec_qrdmlah_s32
3481     };
3482     tcg_debug_assert(vece >= 1 && vece <= 2);
3483     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3484 }
3485
3486 void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
3487                           uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
3488 {
3489     static gen_helper_gvec_3_ptr * const fns[2] = {
3490         gen_helper_gvec_qrdmlsh_s16, gen_helper_gvec_qrdmlsh_s32
3491     };
3492     tcg_debug_assert(vece >= 1 && vece <= 2);
3493     gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
3494 }
3495
3496 #define GEN_CMP0(NAME, COND)                                            \
3497     static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a)               \
3498     {                                                                   \
3499         tcg_gen_setcondi_i32(COND, d, a, 0);                            \
3500         tcg_gen_neg_i32(d, d);                                          \
3501     }                                                                   \
3502     static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a)               \
3503     {                                                                   \
3504         tcg_gen_setcondi_i64(COND, d, a, 0);                            \
3505         tcg_gen_neg_i64(d, d);                                          \
3506     }                                                                   \
3507     static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
3508     {                                                                   \
3509         TCGv_vec zero = tcg_const_zeros_vec_matching(d);                \
3510         tcg_gen_cmp_vec(COND, vece, d, a, zero);                        \
3511         tcg_temp_free_vec(zero);                                        \
3512     }                                                                   \
3513     void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m,      \
3514                             uint32_t opr_sz, uint32_t max_sz)           \
3515     {                                                                   \
3516         const GVecGen2 op[4] = {                                        \
3517             { .fno = gen_helper_gvec_##NAME##0_b,                       \
3518               .fniv = gen_##NAME##0_vec,                                \
3519               .opt_opc = vecop_list_cmp,                                \
3520               .vece = MO_8 },                                           \
3521             { .fno = gen_helper_gvec_##NAME##0_h,                       \
3522               .fniv = gen_##NAME##0_vec,                                \
3523               .opt_opc = vecop_list_cmp,                                \
3524               .vece = MO_16 },                                          \
3525             { .fni4 = gen_##NAME##0_i32,                                \
3526               .fniv = gen_##NAME##0_vec,                                \
3527               .opt_opc = vecop_list_cmp,                                \
3528               .vece = MO_32 },                                          \
3529             { .fni8 = gen_##NAME##0_i64,                                \
3530               .fniv = gen_##NAME##0_vec,                                \
3531               .opt_opc = vecop_list_cmp,                                \
3532               .prefer_i64 = TCG_TARGET_REG_BITS == 64,                  \
3533               .vece = MO_64 },                                          \
3534         };                                                              \
3535         tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]);                \
3536     }
3537
3538 static const TCGOpcode vecop_list_cmp[] = {
3539     INDEX_op_cmp_vec, 0
3540 };
3541
3542 GEN_CMP0(ceq, TCG_COND_EQ)
3543 GEN_CMP0(cle, TCG_COND_LE)
3544 GEN_CMP0(cge, TCG_COND_GE)
3545 GEN_CMP0(clt, TCG_COND_LT)
3546 GEN_CMP0(cgt, TCG_COND_GT)
3547
3548 #undef GEN_CMP0
3549
3550 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3551 {
3552     tcg_gen_vec_sar8i_i64(a, a, shift);
3553     tcg_gen_vec_add8_i64(d, d, a);
3554 }
3555
3556 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3557 {
3558     tcg_gen_vec_sar16i_i64(a, a, shift);
3559     tcg_gen_vec_add16_i64(d, d, a);
3560 }
3561
3562 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3563 {
3564     tcg_gen_sari_i32(a, a, shift);
3565     tcg_gen_add_i32(d, d, a);
3566 }
3567
3568 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3569 {
3570     tcg_gen_sari_i64(a, a, shift);
3571     tcg_gen_add_i64(d, d, a);
3572 }
3573
3574 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3575 {
3576     tcg_gen_sari_vec(vece, a, a, sh);
3577     tcg_gen_add_vec(vece, d, d, a);
3578 }
3579
3580 void gen_gvec_ssra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3581                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3582 {
3583     static const TCGOpcode vecop_list[] = {
3584         INDEX_op_sari_vec, INDEX_op_add_vec, 0
3585     };
3586     static const GVecGen2i ops[4] = {
3587         { .fni8 = gen_ssra8_i64,
3588           .fniv = gen_ssra_vec,
3589           .fno = gen_helper_gvec_ssra_b,
3590           .load_dest = true,
3591           .opt_opc = vecop_list,
3592           .vece = MO_8 },
3593         { .fni8 = gen_ssra16_i64,
3594           .fniv = gen_ssra_vec,
3595           .fno = gen_helper_gvec_ssra_h,
3596           .load_dest = true,
3597           .opt_opc = vecop_list,
3598           .vece = MO_16 },
3599         { .fni4 = gen_ssra32_i32,
3600           .fniv = gen_ssra_vec,
3601           .fno = gen_helper_gvec_ssra_s,
3602           .load_dest = true,
3603           .opt_opc = vecop_list,
3604           .vece = MO_32 },
3605         { .fni8 = gen_ssra64_i64,
3606           .fniv = gen_ssra_vec,
3607           .fno = gen_helper_gvec_ssra_b,
3608           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3609           .opt_opc = vecop_list,
3610           .load_dest = true,
3611           .vece = MO_64 },
3612     };
3613
3614     /* tszimm encoding produces immediates in the range [1..esize]. */
3615     tcg_debug_assert(shift > 0);
3616     tcg_debug_assert(shift <= (8 << vece));
3617
3618     /*
3619      * Shifts larger than the element size are architecturally valid.
3620      * Signed results in all sign bits.
3621      */
3622     shift = MIN(shift, (8 << vece) - 1);
3623     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3624 }
3625
3626 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3627 {
3628     tcg_gen_vec_shr8i_i64(a, a, shift);
3629     tcg_gen_vec_add8_i64(d, d, a);
3630 }
3631
3632 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3633 {
3634     tcg_gen_vec_shr16i_i64(a, a, shift);
3635     tcg_gen_vec_add16_i64(d, d, a);
3636 }
3637
3638 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3639 {
3640     tcg_gen_shri_i32(a, a, shift);
3641     tcg_gen_add_i32(d, d, a);
3642 }
3643
3644 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3645 {
3646     tcg_gen_shri_i64(a, a, shift);
3647     tcg_gen_add_i64(d, d, a);
3648 }
3649
3650 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3651 {
3652     tcg_gen_shri_vec(vece, a, a, sh);
3653     tcg_gen_add_vec(vece, d, d, a);
3654 }
3655
3656 void gen_gvec_usra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3657                    int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3658 {
3659     static const TCGOpcode vecop_list[] = {
3660         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3661     };
3662     static const GVecGen2i ops[4] = {
3663         { .fni8 = gen_usra8_i64,
3664           .fniv = gen_usra_vec,
3665           .fno = gen_helper_gvec_usra_b,
3666           .load_dest = true,
3667           .opt_opc = vecop_list,
3668           .vece = MO_8, },
3669         { .fni8 = gen_usra16_i64,
3670           .fniv = gen_usra_vec,
3671           .fno = gen_helper_gvec_usra_h,
3672           .load_dest = true,
3673           .opt_opc = vecop_list,
3674           .vece = MO_16, },
3675         { .fni4 = gen_usra32_i32,
3676           .fniv = gen_usra_vec,
3677           .fno = gen_helper_gvec_usra_s,
3678           .load_dest = true,
3679           .opt_opc = vecop_list,
3680           .vece = MO_32, },
3681         { .fni8 = gen_usra64_i64,
3682           .fniv = gen_usra_vec,
3683           .fno = gen_helper_gvec_usra_d,
3684           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3685           .load_dest = true,
3686           .opt_opc = vecop_list,
3687           .vece = MO_64, },
3688     };
3689
3690     /* tszimm encoding produces immediates in the range [1..esize]. */
3691     tcg_debug_assert(shift > 0);
3692     tcg_debug_assert(shift <= (8 << vece));
3693
3694     /*
3695      * Shifts larger than the element size are architecturally valid.
3696      * Unsigned results in all zeros as input to accumulate: nop.
3697      */
3698     if (shift < (8 << vece)) {
3699         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3700     } else {
3701         /* Nop, but we do need to clear the tail. */
3702         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3703     }
3704 }
3705
3706 /*
3707  * Shift one less than the requested amount, and the low bit is
3708  * the rounding bit.  For the 8 and 16-bit operations, because we
3709  * mask the low bit, we can perform a normal integer shift instead
3710  * of a vector shift.
3711  */
3712 static void gen_srshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3713 {
3714     TCGv_i64 t = tcg_temp_new_i64();
3715
3716     tcg_gen_shri_i64(t, a, sh - 1);
3717     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3718     tcg_gen_vec_sar8i_i64(d, a, sh);
3719     tcg_gen_vec_add8_i64(d, d, t);
3720     tcg_temp_free_i64(t);
3721 }
3722
3723 static void gen_srshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3724 {
3725     TCGv_i64 t = tcg_temp_new_i64();
3726
3727     tcg_gen_shri_i64(t, a, sh - 1);
3728     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3729     tcg_gen_vec_sar16i_i64(d, a, sh);
3730     tcg_gen_vec_add16_i64(d, d, t);
3731     tcg_temp_free_i64(t);
3732 }
3733
3734 static void gen_srshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3735 {
3736     TCGv_i32 t = tcg_temp_new_i32();
3737
3738     tcg_gen_extract_i32(t, a, sh - 1, 1);
3739     tcg_gen_sari_i32(d, a, sh);
3740     tcg_gen_add_i32(d, d, t);
3741     tcg_temp_free_i32(t);
3742 }
3743
3744 static void gen_srshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3745 {
3746     TCGv_i64 t = tcg_temp_new_i64();
3747
3748     tcg_gen_extract_i64(t, a, sh - 1, 1);
3749     tcg_gen_sari_i64(d, a, sh);
3750     tcg_gen_add_i64(d, d, t);
3751     tcg_temp_free_i64(t);
3752 }
3753
3754 static void gen_srshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3755 {
3756     TCGv_vec t = tcg_temp_new_vec_matching(d);
3757     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3758
3759     tcg_gen_shri_vec(vece, t, a, sh - 1);
3760     tcg_gen_dupi_vec(vece, ones, 1);
3761     tcg_gen_and_vec(vece, t, t, ones);
3762     tcg_gen_sari_vec(vece, d, a, sh);
3763     tcg_gen_add_vec(vece, d, d, t);
3764
3765     tcg_temp_free_vec(t);
3766     tcg_temp_free_vec(ones);
3767 }
3768
3769 void gen_gvec_srshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3770                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3771 {
3772     static const TCGOpcode vecop_list[] = {
3773         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3774     };
3775     static const GVecGen2i ops[4] = {
3776         { .fni8 = gen_srshr8_i64,
3777           .fniv = gen_srshr_vec,
3778           .fno = gen_helper_gvec_srshr_b,
3779           .opt_opc = vecop_list,
3780           .vece = MO_8 },
3781         { .fni8 = gen_srshr16_i64,
3782           .fniv = gen_srshr_vec,
3783           .fno = gen_helper_gvec_srshr_h,
3784           .opt_opc = vecop_list,
3785           .vece = MO_16 },
3786         { .fni4 = gen_srshr32_i32,
3787           .fniv = gen_srshr_vec,
3788           .fno = gen_helper_gvec_srshr_s,
3789           .opt_opc = vecop_list,
3790           .vece = MO_32 },
3791         { .fni8 = gen_srshr64_i64,
3792           .fniv = gen_srshr_vec,
3793           .fno = gen_helper_gvec_srshr_d,
3794           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3795           .opt_opc = vecop_list,
3796           .vece = MO_64 },
3797     };
3798
3799     /* tszimm encoding produces immediates in the range [1..esize] */
3800     tcg_debug_assert(shift > 0);
3801     tcg_debug_assert(shift <= (8 << vece));
3802
3803     if (shift == (8 << vece)) {
3804         /*
3805          * Shifts larger than the element size are architecturally valid.
3806          * Signed results in all sign bits.  With rounding, this produces
3807          *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3808          * I.e. always zero.
3809          */
3810         tcg_gen_gvec_dup_imm(vece, rd_ofs, opr_sz, max_sz, 0);
3811     } else {
3812         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3813     }
3814 }
3815
3816 static void gen_srsra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3817 {
3818     TCGv_i64 t = tcg_temp_new_i64();
3819
3820     gen_srshr8_i64(t, a, sh);
3821     tcg_gen_vec_add8_i64(d, d, t);
3822     tcg_temp_free_i64(t);
3823 }
3824
3825 static void gen_srsra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3826 {
3827     TCGv_i64 t = tcg_temp_new_i64();
3828
3829     gen_srshr16_i64(t, a, sh);
3830     tcg_gen_vec_add16_i64(d, d, t);
3831     tcg_temp_free_i64(t);
3832 }
3833
3834 static void gen_srsra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3835 {
3836     TCGv_i32 t = tcg_temp_new_i32();
3837
3838     gen_srshr32_i32(t, a, sh);
3839     tcg_gen_add_i32(d, d, t);
3840     tcg_temp_free_i32(t);
3841 }
3842
3843 static void gen_srsra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3844 {
3845     TCGv_i64 t = tcg_temp_new_i64();
3846
3847     gen_srshr64_i64(t, a, sh);
3848     tcg_gen_add_i64(d, d, t);
3849     tcg_temp_free_i64(t);
3850 }
3851
3852 static void gen_srsra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3853 {
3854     TCGv_vec t = tcg_temp_new_vec_matching(d);
3855
3856     gen_srshr_vec(vece, t, a, sh);
3857     tcg_gen_add_vec(vece, d, d, t);
3858     tcg_temp_free_vec(t);
3859 }
3860
3861 void gen_gvec_srsra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3862                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3863 {
3864     static const TCGOpcode vecop_list[] = {
3865         INDEX_op_shri_vec, INDEX_op_sari_vec, INDEX_op_add_vec, 0
3866     };
3867     static const GVecGen2i ops[4] = {
3868         { .fni8 = gen_srsra8_i64,
3869           .fniv = gen_srsra_vec,
3870           .fno = gen_helper_gvec_srsra_b,
3871           .opt_opc = vecop_list,
3872           .load_dest = true,
3873           .vece = MO_8 },
3874         { .fni8 = gen_srsra16_i64,
3875           .fniv = gen_srsra_vec,
3876           .fno = gen_helper_gvec_srsra_h,
3877           .opt_opc = vecop_list,
3878           .load_dest = true,
3879           .vece = MO_16 },
3880         { .fni4 = gen_srsra32_i32,
3881           .fniv = gen_srsra_vec,
3882           .fno = gen_helper_gvec_srsra_s,
3883           .opt_opc = vecop_list,
3884           .load_dest = true,
3885           .vece = MO_32 },
3886         { .fni8 = gen_srsra64_i64,
3887           .fniv = gen_srsra_vec,
3888           .fno = gen_helper_gvec_srsra_d,
3889           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3890           .opt_opc = vecop_list,
3891           .load_dest = true,
3892           .vece = MO_64 },
3893     };
3894
3895     /* tszimm encoding produces immediates in the range [1..esize] */
3896     tcg_debug_assert(shift > 0);
3897     tcg_debug_assert(shift <= (8 << vece));
3898
3899     /*
3900      * Shifts larger than the element size are architecturally valid.
3901      * Signed results in all sign bits.  With rounding, this produces
3902      *   (-1 + 1) >> 1 == 0, or (0 + 1) >> 1 == 0.
3903      * I.e. always zero.  With accumulation, this leaves D unchanged.
3904      */
3905     if (shift == (8 << vece)) {
3906         /* Nop, but we do need to clear the tail. */
3907         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
3908     } else {
3909         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
3910     }
3911 }
3912
3913 static void gen_urshr8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3914 {
3915     TCGv_i64 t = tcg_temp_new_i64();
3916
3917     tcg_gen_shri_i64(t, a, sh - 1);
3918     tcg_gen_andi_i64(t, t, dup_const(MO_8, 1));
3919     tcg_gen_vec_shr8i_i64(d, a, sh);
3920     tcg_gen_vec_add8_i64(d, d, t);
3921     tcg_temp_free_i64(t);
3922 }
3923
3924 static void gen_urshr16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3925 {
3926     TCGv_i64 t = tcg_temp_new_i64();
3927
3928     tcg_gen_shri_i64(t, a, sh - 1);
3929     tcg_gen_andi_i64(t, t, dup_const(MO_16, 1));
3930     tcg_gen_vec_shr16i_i64(d, a, sh);
3931     tcg_gen_vec_add16_i64(d, d, t);
3932     tcg_temp_free_i64(t);
3933 }
3934
3935 static void gen_urshr32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
3936 {
3937     TCGv_i32 t = tcg_temp_new_i32();
3938
3939     tcg_gen_extract_i32(t, a, sh - 1, 1);
3940     tcg_gen_shri_i32(d, a, sh);
3941     tcg_gen_add_i32(d, d, t);
3942     tcg_temp_free_i32(t);
3943 }
3944
3945 static void gen_urshr64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
3946 {
3947     TCGv_i64 t = tcg_temp_new_i64();
3948
3949     tcg_gen_extract_i64(t, a, sh - 1, 1);
3950     tcg_gen_shri_i64(d, a, sh);
3951     tcg_gen_add_i64(d, d, t);
3952     tcg_temp_free_i64(t);
3953 }
3954
3955 static void gen_urshr_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t shift)
3956 {
3957     TCGv_vec t = tcg_temp_new_vec_matching(d);
3958     TCGv_vec ones = tcg_temp_new_vec_matching(d);
3959
3960     tcg_gen_shri_vec(vece, t, a, shift - 1);
3961     tcg_gen_dupi_vec(vece, ones, 1);
3962     tcg_gen_and_vec(vece, t, t, ones);
3963     tcg_gen_shri_vec(vece, d, a, shift);
3964     tcg_gen_add_vec(vece, d, d, t);
3965
3966     tcg_temp_free_vec(t);
3967     tcg_temp_free_vec(ones);
3968 }
3969
3970 void gen_gvec_urshr(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
3971                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
3972 {
3973     static const TCGOpcode vecop_list[] = {
3974         INDEX_op_shri_vec, INDEX_op_add_vec, 0
3975     };
3976     static const GVecGen2i ops[4] = {
3977         { .fni8 = gen_urshr8_i64,
3978           .fniv = gen_urshr_vec,
3979           .fno = gen_helper_gvec_urshr_b,
3980           .opt_opc = vecop_list,
3981           .vece = MO_8 },
3982         { .fni8 = gen_urshr16_i64,
3983           .fniv = gen_urshr_vec,
3984           .fno = gen_helper_gvec_urshr_h,
3985           .opt_opc = vecop_list,
3986           .vece = MO_16 },
3987         { .fni4 = gen_urshr32_i32,
3988           .fniv = gen_urshr_vec,
3989           .fno = gen_helper_gvec_urshr_s,
3990           .opt_opc = vecop_list,
3991           .vece = MO_32 },
3992         { .fni8 = gen_urshr64_i64,
3993           .fniv = gen_urshr_vec,
3994           .fno = gen_helper_gvec_urshr_d,
3995           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
3996           .opt_opc = vecop_list,
3997           .vece = MO_64 },
3998     };
3999
4000     /* tszimm encoding produces immediates in the range [1..esize] */
4001     tcg_debug_assert(shift > 0);
4002     tcg_debug_assert(shift <= (8 << vece));
4003
4004     if (shift == (8 << vece)) {
4005         /*
4006          * Shifts larger than the element size are architecturally valid.
4007          * Unsigned results in zero.  With rounding, this produces a
4008          * copy of the most significant bit.
4009          */
4010         tcg_gen_gvec_shri(vece, rd_ofs, rm_ofs, shift - 1, opr_sz, max_sz);
4011     } else {
4012         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4013     }
4014 }
4015
4016 static void gen_ursra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4017 {
4018     TCGv_i64 t = tcg_temp_new_i64();
4019
4020     if (sh == 8) {
4021         tcg_gen_vec_shr8i_i64(t, a, 7);
4022     } else {
4023         gen_urshr8_i64(t, a, sh);
4024     }
4025     tcg_gen_vec_add8_i64(d, d, t);
4026     tcg_temp_free_i64(t);
4027 }
4028
4029 static void gen_ursra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4030 {
4031     TCGv_i64 t = tcg_temp_new_i64();
4032
4033     if (sh == 16) {
4034         tcg_gen_vec_shr16i_i64(t, a, 15);
4035     } else {
4036         gen_urshr16_i64(t, a, sh);
4037     }
4038     tcg_gen_vec_add16_i64(d, d, t);
4039     tcg_temp_free_i64(t);
4040 }
4041
4042 static void gen_ursra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t sh)
4043 {
4044     TCGv_i32 t = tcg_temp_new_i32();
4045
4046     if (sh == 32) {
4047         tcg_gen_shri_i32(t, a, 31);
4048     } else {
4049         gen_urshr32_i32(t, a, sh);
4050     }
4051     tcg_gen_add_i32(d, d, t);
4052     tcg_temp_free_i32(t);
4053 }
4054
4055 static void gen_ursra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t sh)
4056 {
4057     TCGv_i64 t = tcg_temp_new_i64();
4058
4059     if (sh == 64) {
4060         tcg_gen_shri_i64(t, a, 63);
4061     } else {
4062         gen_urshr64_i64(t, a, sh);
4063     }
4064     tcg_gen_add_i64(d, d, t);
4065     tcg_temp_free_i64(t);
4066 }
4067
4068 static void gen_ursra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4069 {
4070     TCGv_vec t = tcg_temp_new_vec_matching(d);
4071
4072     if (sh == (8 << vece)) {
4073         tcg_gen_shri_vec(vece, t, a, sh - 1);
4074     } else {
4075         gen_urshr_vec(vece, t, a, sh);
4076     }
4077     tcg_gen_add_vec(vece, d, d, t);
4078     tcg_temp_free_vec(t);
4079 }
4080
4081 void gen_gvec_ursra(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4082                     int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4083 {
4084     static const TCGOpcode vecop_list[] = {
4085         INDEX_op_shri_vec, INDEX_op_add_vec, 0
4086     };
4087     static const GVecGen2i ops[4] = {
4088         { .fni8 = gen_ursra8_i64,
4089           .fniv = gen_ursra_vec,
4090           .fno = gen_helper_gvec_ursra_b,
4091           .opt_opc = vecop_list,
4092           .load_dest = true,
4093           .vece = MO_8 },
4094         { .fni8 = gen_ursra16_i64,
4095           .fniv = gen_ursra_vec,
4096           .fno = gen_helper_gvec_ursra_h,
4097           .opt_opc = vecop_list,
4098           .load_dest = true,
4099           .vece = MO_16 },
4100         { .fni4 = gen_ursra32_i32,
4101           .fniv = gen_ursra_vec,
4102           .fno = gen_helper_gvec_ursra_s,
4103           .opt_opc = vecop_list,
4104           .load_dest = true,
4105           .vece = MO_32 },
4106         { .fni8 = gen_ursra64_i64,
4107           .fniv = gen_ursra_vec,
4108           .fno = gen_helper_gvec_ursra_d,
4109           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4110           .opt_opc = vecop_list,
4111           .load_dest = true,
4112           .vece = MO_64 },
4113     };
4114
4115     /* tszimm encoding produces immediates in the range [1..esize] */
4116     tcg_debug_assert(shift > 0);
4117     tcg_debug_assert(shift <= (8 << vece));
4118
4119     tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4120 }
4121
4122 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4123 {
4124     uint64_t mask = dup_const(MO_8, 0xff >> shift);
4125     TCGv_i64 t = tcg_temp_new_i64();
4126
4127     tcg_gen_shri_i64(t, a, shift);
4128     tcg_gen_andi_i64(t, t, mask);
4129     tcg_gen_andi_i64(d, d, ~mask);
4130     tcg_gen_or_i64(d, d, t);
4131     tcg_temp_free_i64(t);
4132 }
4133
4134 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4135 {
4136     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
4137     TCGv_i64 t = tcg_temp_new_i64();
4138
4139     tcg_gen_shri_i64(t, a, shift);
4140     tcg_gen_andi_i64(t, t, mask);
4141     tcg_gen_andi_i64(d, d, ~mask);
4142     tcg_gen_or_i64(d, d, t);
4143     tcg_temp_free_i64(t);
4144 }
4145
4146 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4147 {
4148     tcg_gen_shri_i32(a, a, shift);
4149     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
4150 }
4151
4152 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4153 {
4154     tcg_gen_shri_i64(a, a, shift);
4155     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
4156 }
4157
4158 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4159 {
4160     TCGv_vec t = tcg_temp_new_vec_matching(d);
4161     TCGv_vec m = tcg_temp_new_vec_matching(d);
4162
4163     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((8 << vece) - sh, sh));
4164     tcg_gen_shri_vec(vece, t, a, sh);
4165     tcg_gen_and_vec(vece, d, d, m);
4166     tcg_gen_or_vec(vece, d, d, t);
4167
4168     tcg_temp_free_vec(t);
4169     tcg_temp_free_vec(m);
4170 }
4171
4172 void gen_gvec_sri(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4173                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4174 {
4175     static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 };
4176     const GVecGen2i ops[4] = {
4177         { .fni8 = gen_shr8_ins_i64,
4178           .fniv = gen_shr_ins_vec,
4179           .fno = gen_helper_gvec_sri_b,
4180           .load_dest = true,
4181           .opt_opc = vecop_list,
4182           .vece = MO_8 },
4183         { .fni8 = gen_shr16_ins_i64,
4184           .fniv = gen_shr_ins_vec,
4185           .fno = gen_helper_gvec_sri_h,
4186           .load_dest = true,
4187           .opt_opc = vecop_list,
4188           .vece = MO_16 },
4189         { .fni4 = gen_shr32_ins_i32,
4190           .fniv = gen_shr_ins_vec,
4191           .fno = gen_helper_gvec_sri_s,
4192           .load_dest = true,
4193           .opt_opc = vecop_list,
4194           .vece = MO_32 },
4195         { .fni8 = gen_shr64_ins_i64,
4196           .fniv = gen_shr_ins_vec,
4197           .fno = gen_helper_gvec_sri_d,
4198           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4199           .load_dest = true,
4200           .opt_opc = vecop_list,
4201           .vece = MO_64 },
4202     };
4203
4204     /* tszimm encoding produces immediates in the range [1..esize]. */
4205     tcg_debug_assert(shift > 0);
4206     tcg_debug_assert(shift <= (8 << vece));
4207
4208     /* Shift of esize leaves destination unchanged. */
4209     if (shift < (8 << vece)) {
4210         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4211     } else {
4212         /* Nop, but we do need to clear the tail. */
4213         tcg_gen_gvec_mov(vece, rd_ofs, rd_ofs, opr_sz, max_sz);
4214     }
4215 }
4216
4217 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4218 {
4219     uint64_t mask = dup_const(MO_8, 0xff << shift);
4220     TCGv_i64 t = tcg_temp_new_i64();
4221
4222     tcg_gen_shli_i64(t, a, shift);
4223     tcg_gen_andi_i64(t, t, mask);
4224     tcg_gen_andi_i64(d, d, ~mask);
4225     tcg_gen_or_i64(d, d, t);
4226     tcg_temp_free_i64(t);
4227 }
4228
4229 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4230 {
4231     uint64_t mask = dup_const(MO_16, 0xffff << shift);
4232     TCGv_i64 t = tcg_temp_new_i64();
4233
4234     tcg_gen_shli_i64(t, a, shift);
4235     tcg_gen_andi_i64(t, t, mask);
4236     tcg_gen_andi_i64(d, d, ~mask);
4237     tcg_gen_or_i64(d, d, t);
4238     tcg_temp_free_i64(t);
4239 }
4240
4241 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4242 {
4243     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
4244 }
4245
4246 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4247 {
4248     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
4249 }
4250
4251 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4252 {
4253     TCGv_vec t = tcg_temp_new_vec_matching(d);
4254     TCGv_vec m = tcg_temp_new_vec_matching(d);
4255
4256     tcg_gen_shli_vec(vece, t, a, sh);
4257     tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(0, sh));
4258     tcg_gen_and_vec(vece, d, d, m);
4259     tcg_gen_or_vec(vece, d, d, t);
4260
4261     tcg_temp_free_vec(t);
4262     tcg_temp_free_vec(m);
4263 }
4264
4265 void gen_gvec_sli(unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
4266                   int64_t shift, uint32_t opr_sz, uint32_t max_sz)
4267 {
4268     static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 };
4269     const GVecGen2i ops[4] = {
4270         { .fni8 = gen_shl8_ins_i64,
4271           .fniv = gen_shl_ins_vec,
4272           .fno = gen_helper_gvec_sli_b,
4273           .load_dest = true,
4274           .opt_opc = vecop_list,
4275           .vece = MO_8 },
4276         { .fni8 = gen_shl16_ins_i64,
4277           .fniv = gen_shl_ins_vec,
4278           .fno = gen_helper_gvec_sli_h,
4279           .load_dest = true,
4280           .opt_opc = vecop_list,
4281           .vece = MO_16 },
4282         { .fni4 = gen_shl32_ins_i32,
4283           .fniv = gen_shl_ins_vec,
4284           .fno = gen_helper_gvec_sli_s,
4285           .load_dest = true,
4286           .opt_opc = vecop_list,
4287           .vece = MO_32 },
4288         { .fni8 = gen_shl64_ins_i64,
4289           .fniv = gen_shl_ins_vec,
4290           .fno = gen_helper_gvec_sli_d,
4291           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4292           .load_dest = true,
4293           .opt_opc = vecop_list,
4294           .vece = MO_64 },
4295     };
4296
4297     /* tszimm encoding produces immediates in the range [0..esize-1]. */
4298     tcg_debug_assert(shift >= 0);
4299     tcg_debug_assert(shift < (8 << vece));
4300
4301     if (shift == 0) {
4302         tcg_gen_gvec_mov(vece, rd_ofs, rm_ofs, opr_sz, max_sz);
4303     } else {
4304         tcg_gen_gvec_2i(rd_ofs, rm_ofs, opr_sz, max_sz, shift, &ops[vece]);
4305     }
4306 }
4307
4308 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4309 {
4310     gen_helper_neon_mul_u8(a, a, b);
4311     gen_helper_neon_add_u8(d, d, a);
4312 }
4313
4314 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4315 {
4316     gen_helper_neon_mul_u8(a, a, b);
4317     gen_helper_neon_sub_u8(d, d, a);
4318 }
4319
4320 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4321 {
4322     gen_helper_neon_mul_u16(a, a, b);
4323     gen_helper_neon_add_u16(d, d, a);
4324 }
4325
4326 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4327 {
4328     gen_helper_neon_mul_u16(a, a, b);
4329     gen_helper_neon_sub_u16(d, d, a);
4330 }
4331
4332 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4333 {
4334     tcg_gen_mul_i32(a, a, b);
4335     tcg_gen_add_i32(d, d, a);
4336 }
4337
4338 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4339 {
4340     tcg_gen_mul_i32(a, a, b);
4341     tcg_gen_sub_i32(d, d, a);
4342 }
4343
4344 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4345 {
4346     tcg_gen_mul_i64(a, a, b);
4347     tcg_gen_add_i64(d, d, a);
4348 }
4349
4350 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4351 {
4352     tcg_gen_mul_i64(a, a, b);
4353     tcg_gen_sub_i64(d, d, a);
4354 }
4355
4356 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4357 {
4358     tcg_gen_mul_vec(vece, a, a, b);
4359     tcg_gen_add_vec(vece, d, d, a);
4360 }
4361
4362 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4363 {
4364     tcg_gen_mul_vec(vece, a, a, b);
4365     tcg_gen_sub_vec(vece, d, d, a);
4366 }
4367
4368 /* Note that while NEON does not support VMLA and VMLS as 64-bit ops,
4369  * these tables are shared with AArch64 which does support them.
4370  */
4371 void gen_gvec_mla(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4372                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4373 {
4374     static const TCGOpcode vecop_list[] = {
4375         INDEX_op_mul_vec, INDEX_op_add_vec, 0
4376     };
4377     static const GVecGen3 ops[4] = {
4378         { .fni4 = gen_mla8_i32,
4379           .fniv = gen_mla_vec,
4380           .load_dest = true,
4381           .opt_opc = vecop_list,
4382           .vece = MO_8 },
4383         { .fni4 = gen_mla16_i32,
4384           .fniv = gen_mla_vec,
4385           .load_dest = true,
4386           .opt_opc = vecop_list,
4387           .vece = MO_16 },
4388         { .fni4 = gen_mla32_i32,
4389           .fniv = gen_mla_vec,
4390           .load_dest = true,
4391           .opt_opc = vecop_list,
4392           .vece = MO_32 },
4393         { .fni8 = gen_mla64_i64,
4394           .fniv = gen_mla_vec,
4395           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4396           .load_dest = true,
4397           .opt_opc = vecop_list,
4398           .vece = MO_64 },
4399     };
4400     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4401 }
4402
4403 void gen_gvec_mls(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4404                   uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4405 {
4406     static const TCGOpcode vecop_list[] = {
4407         INDEX_op_mul_vec, INDEX_op_sub_vec, 0
4408     };
4409     static const GVecGen3 ops[4] = {
4410         { .fni4 = gen_mls8_i32,
4411           .fniv = gen_mls_vec,
4412           .load_dest = true,
4413           .opt_opc = vecop_list,
4414           .vece = MO_8 },
4415         { .fni4 = gen_mls16_i32,
4416           .fniv = gen_mls_vec,
4417           .load_dest = true,
4418           .opt_opc = vecop_list,
4419           .vece = MO_16 },
4420         { .fni4 = gen_mls32_i32,
4421           .fniv = gen_mls_vec,
4422           .load_dest = true,
4423           .opt_opc = vecop_list,
4424           .vece = MO_32 },
4425         { .fni8 = gen_mls64_i64,
4426           .fniv = gen_mls_vec,
4427           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4428           .load_dest = true,
4429           .opt_opc = vecop_list,
4430           .vece = MO_64 },
4431     };
4432     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4433 }
4434
4435 /* CMTST : test is "if (X & Y != 0)". */
4436 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4437 {
4438     tcg_gen_and_i32(d, a, b);
4439     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
4440     tcg_gen_neg_i32(d, d);
4441 }
4442
4443 void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4444 {
4445     tcg_gen_and_i64(d, a, b);
4446     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
4447     tcg_gen_neg_i64(d, d);
4448 }
4449
4450 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4451 {
4452     tcg_gen_and_vec(vece, d, a, b);
4453     tcg_gen_dupi_vec(vece, a, 0);
4454     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4455 }
4456
4457 void gen_gvec_cmtst(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4458                     uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4459 {
4460     static const TCGOpcode vecop_list[] = { INDEX_op_cmp_vec, 0 };
4461     static const GVecGen3 ops[4] = {
4462         { .fni4 = gen_helper_neon_tst_u8,
4463           .fniv = gen_cmtst_vec,
4464           .opt_opc = vecop_list,
4465           .vece = MO_8 },
4466         { .fni4 = gen_helper_neon_tst_u16,
4467           .fniv = gen_cmtst_vec,
4468           .opt_opc = vecop_list,
4469           .vece = MO_16 },
4470         { .fni4 = gen_cmtst_i32,
4471           .fniv = gen_cmtst_vec,
4472           .opt_opc = vecop_list,
4473           .vece = MO_32 },
4474         { .fni8 = gen_cmtst_i64,
4475           .fniv = gen_cmtst_vec,
4476           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4477           .opt_opc = vecop_list,
4478           .vece = MO_64 },
4479     };
4480     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4481 }
4482
4483 void gen_ushl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4484 {
4485     TCGv_i32 lval = tcg_temp_new_i32();
4486     TCGv_i32 rval = tcg_temp_new_i32();
4487     TCGv_i32 lsh = tcg_temp_new_i32();
4488     TCGv_i32 rsh = tcg_temp_new_i32();
4489     TCGv_i32 zero = tcg_const_i32(0);
4490     TCGv_i32 max = tcg_const_i32(32);
4491
4492     /*
4493      * Rely on the TCG guarantee that out of range shifts produce
4494      * unspecified results, not undefined behaviour (i.e. no trap).
4495      * Discard out-of-range results after the fact.
4496      */
4497     tcg_gen_ext8s_i32(lsh, shift);
4498     tcg_gen_neg_i32(rsh, lsh);
4499     tcg_gen_shl_i32(lval, src, lsh);
4500     tcg_gen_shr_i32(rval, src, rsh);
4501     tcg_gen_movcond_i32(TCG_COND_LTU, dst, lsh, max, lval, zero);
4502     tcg_gen_movcond_i32(TCG_COND_LTU, dst, rsh, max, rval, dst);
4503
4504     tcg_temp_free_i32(lval);
4505     tcg_temp_free_i32(rval);
4506     tcg_temp_free_i32(lsh);
4507     tcg_temp_free_i32(rsh);
4508     tcg_temp_free_i32(zero);
4509     tcg_temp_free_i32(max);
4510 }
4511
4512 void gen_ushl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4513 {
4514     TCGv_i64 lval = tcg_temp_new_i64();
4515     TCGv_i64 rval = tcg_temp_new_i64();
4516     TCGv_i64 lsh = tcg_temp_new_i64();
4517     TCGv_i64 rsh = tcg_temp_new_i64();
4518     TCGv_i64 zero = tcg_const_i64(0);
4519     TCGv_i64 max = tcg_const_i64(64);
4520
4521     /*
4522      * Rely on the TCG guarantee that out of range shifts produce
4523      * unspecified results, not undefined behaviour (i.e. no trap).
4524      * Discard out-of-range results after the fact.
4525      */
4526     tcg_gen_ext8s_i64(lsh, shift);
4527     tcg_gen_neg_i64(rsh, lsh);
4528     tcg_gen_shl_i64(lval, src, lsh);
4529     tcg_gen_shr_i64(rval, src, rsh);
4530     tcg_gen_movcond_i64(TCG_COND_LTU, dst, lsh, max, lval, zero);
4531     tcg_gen_movcond_i64(TCG_COND_LTU, dst, rsh, max, rval, dst);
4532
4533     tcg_temp_free_i64(lval);
4534     tcg_temp_free_i64(rval);
4535     tcg_temp_free_i64(lsh);
4536     tcg_temp_free_i64(rsh);
4537     tcg_temp_free_i64(zero);
4538     tcg_temp_free_i64(max);
4539 }
4540
4541 static void gen_ushl_vec(unsigned vece, TCGv_vec dst,
4542                          TCGv_vec src, TCGv_vec shift)
4543 {
4544     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4545     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4546     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4547     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4548     TCGv_vec msk, max;
4549
4550     tcg_gen_neg_vec(vece, rsh, shift);
4551     if (vece == MO_8) {
4552         tcg_gen_mov_vec(lsh, shift);
4553     } else {
4554         msk = tcg_temp_new_vec_matching(dst);
4555         tcg_gen_dupi_vec(vece, msk, 0xff);
4556         tcg_gen_and_vec(vece, lsh, shift, msk);
4557         tcg_gen_and_vec(vece, rsh, rsh, msk);
4558         tcg_temp_free_vec(msk);
4559     }
4560
4561     /*
4562      * Rely on the TCG guarantee that out of range shifts produce
4563      * unspecified results, not undefined behaviour (i.e. no trap).
4564      * Discard out-of-range results after the fact.
4565      */
4566     tcg_gen_shlv_vec(vece, lval, src, lsh);
4567     tcg_gen_shrv_vec(vece, rval, src, rsh);
4568
4569     max = tcg_temp_new_vec_matching(dst);
4570     tcg_gen_dupi_vec(vece, max, 8 << vece);
4571
4572     /*
4573      * The choice of LT (signed) and GEU (unsigned) are biased toward
4574      * the instructions of the x86_64 host.  For MO_8, the whole byte
4575      * is significant so we must use an unsigned compare; otherwise we
4576      * have already masked to a byte and so a signed compare works.
4577      * Other tcg hosts have a full set of comparisons and do not care.
4578      */
4579     if (vece == MO_8) {
4580         tcg_gen_cmp_vec(TCG_COND_GEU, vece, lsh, lsh, max);
4581         tcg_gen_cmp_vec(TCG_COND_GEU, vece, rsh, rsh, max);
4582         tcg_gen_andc_vec(vece, lval, lval, lsh);
4583         tcg_gen_andc_vec(vece, rval, rval, rsh);
4584     } else {
4585         tcg_gen_cmp_vec(TCG_COND_LT, vece, lsh, lsh, max);
4586         tcg_gen_cmp_vec(TCG_COND_LT, vece, rsh, rsh, max);
4587         tcg_gen_and_vec(vece, lval, lval, lsh);
4588         tcg_gen_and_vec(vece, rval, rval, rsh);
4589     }
4590     tcg_gen_or_vec(vece, dst, lval, rval);
4591
4592     tcg_temp_free_vec(max);
4593     tcg_temp_free_vec(lval);
4594     tcg_temp_free_vec(rval);
4595     tcg_temp_free_vec(lsh);
4596     tcg_temp_free_vec(rsh);
4597 }
4598
4599 void gen_gvec_ushl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4600                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4601 {
4602     static const TCGOpcode vecop_list[] = {
4603         INDEX_op_neg_vec, INDEX_op_shlv_vec,
4604         INDEX_op_shrv_vec, INDEX_op_cmp_vec, 0
4605     };
4606     static const GVecGen3 ops[4] = {
4607         { .fniv = gen_ushl_vec,
4608           .fno = gen_helper_gvec_ushl_b,
4609           .opt_opc = vecop_list,
4610           .vece = MO_8 },
4611         { .fniv = gen_ushl_vec,
4612           .fno = gen_helper_gvec_ushl_h,
4613           .opt_opc = vecop_list,
4614           .vece = MO_16 },
4615         { .fni4 = gen_ushl_i32,
4616           .fniv = gen_ushl_vec,
4617           .opt_opc = vecop_list,
4618           .vece = MO_32 },
4619         { .fni8 = gen_ushl_i64,
4620           .fniv = gen_ushl_vec,
4621           .opt_opc = vecop_list,
4622           .vece = MO_64 },
4623     };
4624     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4625 }
4626
4627 void gen_sshl_i32(TCGv_i32 dst, TCGv_i32 src, TCGv_i32 shift)
4628 {
4629     TCGv_i32 lval = tcg_temp_new_i32();
4630     TCGv_i32 rval = tcg_temp_new_i32();
4631     TCGv_i32 lsh = tcg_temp_new_i32();
4632     TCGv_i32 rsh = tcg_temp_new_i32();
4633     TCGv_i32 zero = tcg_const_i32(0);
4634     TCGv_i32 max = tcg_const_i32(31);
4635
4636     /*
4637      * Rely on the TCG guarantee that out of range shifts produce
4638      * unspecified results, not undefined behaviour (i.e. no trap).
4639      * Discard out-of-range results after the fact.
4640      */
4641     tcg_gen_ext8s_i32(lsh, shift);
4642     tcg_gen_neg_i32(rsh, lsh);
4643     tcg_gen_shl_i32(lval, src, lsh);
4644     tcg_gen_umin_i32(rsh, rsh, max);
4645     tcg_gen_sar_i32(rval, src, rsh);
4646     tcg_gen_movcond_i32(TCG_COND_LEU, lval, lsh, max, lval, zero);
4647     tcg_gen_movcond_i32(TCG_COND_LT, dst, lsh, zero, rval, lval);
4648
4649     tcg_temp_free_i32(lval);
4650     tcg_temp_free_i32(rval);
4651     tcg_temp_free_i32(lsh);
4652     tcg_temp_free_i32(rsh);
4653     tcg_temp_free_i32(zero);
4654     tcg_temp_free_i32(max);
4655 }
4656
4657 void gen_sshl_i64(TCGv_i64 dst, TCGv_i64 src, TCGv_i64 shift)
4658 {
4659     TCGv_i64 lval = tcg_temp_new_i64();
4660     TCGv_i64 rval = tcg_temp_new_i64();
4661     TCGv_i64 lsh = tcg_temp_new_i64();
4662     TCGv_i64 rsh = tcg_temp_new_i64();
4663     TCGv_i64 zero = tcg_const_i64(0);
4664     TCGv_i64 max = tcg_const_i64(63);
4665
4666     /*
4667      * Rely on the TCG guarantee that out of range shifts produce
4668      * unspecified results, not undefined behaviour (i.e. no trap).
4669      * Discard out-of-range results after the fact.
4670      */
4671     tcg_gen_ext8s_i64(lsh, shift);
4672     tcg_gen_neg_i64(rsh, lsh);
4673     tcg_gen_shl_i64(lval, src, lsh);
4674     tcg_gen_umin_i64(rsh, rsh, max);
4675     tcg_gen_sar_i64(rval, src, rsh);
4676     tcg_gen_movcond_i64(TCG_COND_LEU, lval, lsh, max, lval, zero);
4677     tcg_gen_movcond_i64(TCG_COND_LT, dst, lsh, zero, rval, lval);
4678
4679     tcg_temp_free_i64(lval);
4680     tcg_temp_free_i64(rval);
4681     tcg_temp_free_i64(lsh);
4682     tcg_temp_free_i64(rsh);
4683     tcg_temp_free_i64(zero);
4684     tcg_temp_free_i64(max);
4685 }
4686
4687 static void gen_sshl_vec(unsigned vece, TCGv_vec dst,
4688                          TCGv_vec src, TCGv_vec shift)
4689 {
4690     TCGv_vec lval = tcg_temp_new_vec_matching(dst);
4691     TCGv_vec rval = tcg_temp_new_vec_matching(dst);
4692     TCGv_vec lsh = tcg_temp_new_vec_matching(dst);
4693     TCGv_vec rsh = tcg_temp_new_vec_matching(dst);
4694     TCGv_vec tmp = tcg_temp_new_vec_matching(dst);
4695
4696     /*
4697      * Rely on the TCG guarantee that out of range shifts produce
4698      * unspecified results, not undefined behaviour (i.e. no trap).
4699      * Discard out-of-range results after the fact.
4700      */
4701     tcg_gen_neg_vec(vece, rsh, shift);
4702     if (vece == MO_8) {
4703         tcg_gen_mov_vec(lsh, shift);
4704     } else {
4705         tcg_gen_dupi_vec(vece, tmp, 0xff);
4706         tcg_gen_and_vec(vece, lsh, shift, tmp);
4707         tcg_gen_and_vec(vece, rsh, rsh, tmp);
4708     }
4709
4710     /* Bound rsh so out of bound right shift gets -1.  */
4711     tcg_gen_dupi_vec(vece, tmp, (8 << vece) - 1);
4712     tcg_gen_umin_vec(vece, rsh, rsh, tmp);
4713     tcg_gen_cmp_vec(TCG_COND_GT, vece, tmp, lsh, tmp);
4714
4715     tcg_gen_shlv_vec(vece, lval, src, lsh);
4716     tcg_gen_sarv_vec(vece, rval, src, rsh);
4717
4718     /* Select in-bound left shift.  */
4719     tcg_gen_andc_vec(vece, lval, lval, tmp);
4720
4721     /* Select between left and right shift.  */
4722     if (vece == MO_8) {
4723         tcg_gen_dupi_vec(vece, tmp, 0);
4724         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, rval, lval);
4725     } else {
4726         tcg_gen_dupi_vec(vece, tmp, 0x80);
4727         tcg_gen_cmpsel_vec(TCG_COND_LT, vece, dst, lsh, tmp, lval, rval);
4728     }
4729
4730     tcg_temp_free_vec(lval);
4731     tcg_temp_free_vec(rval);
4732     tcg_temp_free_vec(lsh);
4733     tcg_temp_free_vec(rsh);
4734     tcg_temp_free_vec(tmp);
4735 }
4736
4737 void gen_gvec_sshl(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4738                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4739 {
4740     static const TCGOpcode vecop_list[] = {
4741         INDEX_op_neg_vec, INDEX_op_umin_vec, INDEX_op_shlv_vec,
4742         INDEX_op_sarv_vec, INDEX_op_cmp_vec, INDEX_op_cmpsel_vec, 0
4743     };
4744     static const GVecGen3 ops[4] = {
4745         { .fniv = gen_sshl_vec,
4746           .fno = gen_helper_gvec_sshl_b,
4747           .opt_opc = vecop_list,
4748           .vece = MO_8 },
4749         { .fniv = gen_sshl_vec,
4750           .fno = gen_helper_gvec_sshl_h,
4751           .opt_opc = vecop_list,
4752           .vece = MO_16 },
4753         { .fni4 = gen_sshl_i32,
4754           .fniv = gen_sshl_vec,
4755           .opt_opc = vecop_list,
4756           .vece = MO_32 },
4757         { .fni8 = gen_sshl_i64,
4758           .fniv = gen_sshl_vec,
4759           .opt_opc = vecop_list,
4760           .vece = MO_64 },
4761     };
4762     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4763 }
4764
4765 static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4766                           TCGv_vec a, TCGv_vec b)
4767 {
4768     TCGv_vec x = tcg_temp_new_vec_matching(t);
4769     tcg_gen_add_vec(vece, x, a, b);
4770     tcg_gen_usadd_vec(vece, t, a, b);
4771     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4772     tcg_gen_or_vec(vece, sat, sat, x);
4773     tcg_temp_free_vec(x);
4774 }
4775
4776 void gen_gvec_uqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4777                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4778 {
4779     static const TCGOpcode vecop_list[] = {
4780         INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4781     };
4782     static const GVecGen4 ops[4] = {
4783         { .fniv = gen_uqadd_vec,
4784           .fno = gen_helper_gvec_uqadd_b,
4785           .write_aofs = true,
4786           .opt_opc = vecop_list,
4787           .vece = MO_8 },
4788         { .fniv = gen_uqadd_vec,
4789           .fno = gen_helper_gvec_uqadd_h,
4790           .write_aofs = true,
4791           .opt_opc = vecop_list,
4792           .vece = MO_16 },
4793         { .fniv = gen_uqadd_vec,
4794           .fno = gen_helper_gvec_uqadd_s,
4795           .write_aofs = true,
4796           .opt_opc = vecop_list,
4797           .vece = MO_32 },
4798         { .fniv = gen_uqadd_vec,
4799           .fno = gen_helper_gvec_uqadd_d,
4800           .write_aofs = true,
4801           .opt_opc = vecop_list,
4802           .vece = MO_64 },
4803     };
4804     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4805                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4806 }
4807
4808 static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4809                           TCGv_vec a, TCGv_vec b)
4810 {
4811     TCGv_vec x = tcg_temp_new_vec_matching(t);
4812     tcg_gen_add_vec(vece, x, a, b);
4813     tcg_gen_ssadd_vec(vece, t, a, b);
4814     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4815     tcg_gen_or_vec(vece, sat, sat, x);
4816     tcg_temp_free_vec(x);
4817 }
4818
4819 void gen_gvec_sqadd_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4820                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4821 {
4822     static const TCGOpcode vecop_list[] = {
4823         INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0
4824     };
4825     static const GVecGen4 ops[4] = {
4826         { .fniv = gen_sqadd_vec,
4827           .fno = gen_helper_gvec_sqadd_b,
4828           .opt_opc = vecop_list,
4829           .write_aofs = true,
4830           .vece = MO_8 },
4831         { .fniv = gen_sqadd_vec,
4832           .fno = gen_helper_gvec_sqadd_h,
4833           .opt_opc = vecop_list,
4834           .write_aofs = true,
4835           .vece = MO_16 },
4836         { .fniv = gen_sqadd_vec,
4837           .fno = gen_helper_gvec_sqadd_s,
4838           .opt_opc = vecop_list,
4839           .write_aofs = true,
4840           .vece = MO_32 },
4841         { .fniv = gen_sqadd_vec,
4842           .fno = gen_helper_gvec_sqadd_d,
4843           .opt_opc = vecop_list,
4844           .write_aofs = true,
4845           .vece = MO_64 },
4846     };
4847     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4848                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4849 }
4850
4851 static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4852                           TCGv_vec a, TCGv_vec b)
4853 {
4854     TCGv_vec x = tcg_temp_new_vec_matching(t);
4855     tcg_gen_sub_vec(vece, x, a, b);
4856     tcg_gen_ussub_vec(vece, t, a, b);
4857     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4858     tcg_gen_or_vec(vece, sat, sat, x);
4859     tcg_temp_free_vec(x);
4860 }
4861
4862 void gen_gvec_uqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4863                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4864 {
4865     static const TCGOpcode vecop_list[] = {
4866         INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4867     };
4868     static const GVecGen4 ops[4] = {
4869         { .fniv = gen_uqsub_vec,
4870           .fno = gen_helper_gvec_uqsub_b,
4871           .opt_opc = vecop_list,
4872           .write_aofs = true,
4873           .vece = MO_8 },
4874         { .fniv = gen_uqsub_vec,
4875           .fno = gen_helper_gvec_uqsub_h,
4876           .opt_opc = vecop_list,
4877           .write_aofs = true,
4878           .vece = MO_16 },
4879         { .fniv = gen_uqsub_vec,
4880           .fno = gen_helper_gvec_uqsub_s,
4881           .opt_opc = vecop_list,
4882           .write_aofs = true,
4883           .vece = MO_32 },
4884         { .fniv = gen_uqsub_vec,
4885           .fno = gen_helper_gvec_uqsub_d,
4886           .opt_opc = vecop_list,
4887           .write_aofs = true,
4888           .vece = MO_64 },
4889     };
4890     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4891                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4892 }
4893
4894 static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4895                           TCGv_vec a, TCGv_vec b)
4896 {
4897     TCGv_vec x = tcg_temp_new_vec_matching(t);
4898     tcg_gen_sub_vec(vece, x, a, b);
4899     tcg_gen_sssub_vec(vece, t, a, b);
4900     tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4901     tcg_gen_or_vec(vece, sat, sat, x);
4902     tcg_temp_free_vec(x);
4903 }
4904
4905 void gen_gvec_sqsub_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4906                        uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4907 {
4908     static const TCGOpcode vecop_list[] = {
4909         INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0
4910     };
4911     static const GVecGen4 ops[4] = {
4912         { .fniv = gen_sqsub_vec,
4913           .fno = gen_helper_gvec_sqsub_b,
4914           .opt_opc = vecop_list,
4915           .write_aofs = true,
4916           .vece = MO_8 },
4917         { .fniv = gen_sqsub_vec,
4918           .fno = gen_helper_gvec_sqsub_h,
4919           .opt_opc = vecop_list,
4920           .write_aofs = true,
4921           .vece = MO_16 },
4922         { .fniv = gen_sqsub_vec,
4923           .fno = gen_helper_gvec_sqsub_s,
4924           .opt_opc = vecop_list,
4925           .write_aofs = true,
4926           .vece = MO_32 },
4927         { .fniv = gen_sqsub_vec,
4928           .fno = gen_helper_gvec_sqsub_d,
4929           .opt_opc = vecop_list,
4930           .write_aofs = true,
4931           .vece = MO_64 },
4932     };
4933     tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4934                    rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4935 }
4936
4937 static void gen_sabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4938 {
4939     TCGv_i32 t = tcg_temp_new_i32();
4940
4941     tcg_gen_sub_i32(t, a, b);
4942     tcg_gen_sub_i32(d, b, a);
4943     tcg_gen_movcond_i32(TCG_COND_LT, d, a, b, d, t);
4944     tcg_temp_free_i32(t);
4945 }
4946
4947 static void gen_sabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4948 {
4949     TCGv_i64 t = tcg_temp_new_i64();
4950
4951     tcg_gen_sub_i64(t, a, b);
4952     tcg_gen_sub_i64(d, b, a);
4953     tcg_gen_movcond_i64(TCG_COND_LT, d, a, b, d, t);
4954     tcg_temp_free_i64(t);
4955 }
4956
4957 static void gen_sabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4958 {
4959     TCGv_vec t = tcg_temp_new_vec_matching(d);
4960
4961     tcg_gen_smin_vec(vece, t, a, b);
4962     tcg_gen_smax_vec(vece, d, a, b);
4963     tcg_gen_sub_vec(vece, d, d, t);
4964     tcg_temp_free_vec(t);
4965 }
4966
4967 void gen_gvec_sabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
4968                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
4969 {
4970     static const TCGOpcode vecop_list[] = {
4971         INDEX_op_sub_vec, INDEX_op_smin_vec, INDEX_op_smax_vec, 0
4972     };
4973     static const GVecGen3 ops[4] = {
4974         { .fniv = gen_sabd_vec,
4975           .fno = gen_helper_gvec_sabd_b,
4976           .opt_opc = vecop_list,
4977           .vece = MO_8 },
4978         { .fniv = gen_sabd_vec,
4979           .fno = gen_helper_gvec_sabd_h,
4980           .opt_opc = vecop_list,
4981           .vece = MO_16 },
4982         { .fni4 = gen_sabd_i32,
4983           .fniv = gen_sabd_vec,
4984           .fno = gen_helper_gvec_sabd_s,
4985           .opt_opc = vecop_list,
4986           .vece = MO_32 },
4987         { .fni8 = gen_sabd_i64,
4988           .fniv = gen_sabd_vec,
4989           .fno = gen_helper_gvec_sabd_d,
4990           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
4991           .opt_opc = vecop_list,
4992           .vece = MO_64 },
4993     };
4994     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
4995 }
4996
4997 static void gen_uabd_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4998 {
4999     TCGv_i32 t = tcg_temp_new_i32();
5000
5001     tcg_gen_sub_i32(t, a, b);
5002     tcg_gen_sub_i32(d, b, a);
5003     tcg_gen_movcond_i32(TCG_COND_LTU, d, a, b, d, t);
5004     tcg_temp_free_i32(t);
5005 }
5006
5007 static void gen_uabd_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5008 {
5009     TCGv_i64 t = tcg_temp_new_i64();
5010
5011     tcg_gen_sub_i64(t, a, b);
5012     tcg_gen_sub_i64(d, b, a);
5013     tcg_gen_movcond_i64(TCG_COND_LTU, d, a, b, d, t);
5014     tcg_temp_free_i64(t);
5015 }
5016
5017 static void gen_uabd_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5018 {
5019     TCGv_vec t = tcg_temp_new_vec_matching(d);
5020
5021     tcg_gen_umin_vec(vece, t, a, b);
5022     tcg_gen_umax_vec(vece, d, a, b);
5023     tcg_gen_sub_vec(vece, d, d, t);
5024     tcg_temp_free_vec(t);
5025 }
5026
5027 void gen_gvec_uabd(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5028                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5029 {
5030     static const TCGOpcode vecop_list[] = {
5031         INDEX_op_sub_vec, INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5032     };
5033     static const GVecGen3 ops[4] = {
5034         { .fniv = gen_uabd_vec,
5035           .fno = gen_helper_gvec_uabd_b,
5036           .opt_opc = vecop_list,
5037           .vece = MO_8 },
5038         { .fniv = gen_uabd_vec,
5039           .fno = gen_helper_gvec_uabd_h,
5040           .opt_opc = vecop_list,
5041           .vece = MO_16 },
5042         { .fni4 = gen_uabd_i32,
5043           .fniv = gen_uabd_vec,
5044           .fno = gen_helper_gvec_uabd_s,
5045           .opt_opc = vecop_list,
5046           .vece = MO_32 },
5047         { .fni8 = gen_uabd_i64,
5048           .fniv = gen_uabd_vec,
5049           .fno = gen_helper_gvec_uabd_d,
5050           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5051           .opt_opc = vecop_list,
5052           .vece = MO_64 },
5053     };
5054     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5055 }
5056
5057 static void gen_saba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5058 {
5059     TCGv_i32 t = tcg_temp_new_i32();
5060     gen_sabd_i32(t, a, b);
5061     tcg_gen_add_i32(d, d, t);
5062     tcg_temp_free_i32(t);
5063 }
5064
5065 static void gen_saba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5066 {
5067     TCGv_i64 t = tcg_temp_new_i64();
5068     gen_sabd_i64(t, a, b);
5069     tcg_gen_add_i64(d, d, t);
5070     tcg_temp_free_i64(t);
5071 }
5072
5073 static void gen_saba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5074 {
5075     TCGv_vec t = tcg_temp_new_vec_matching(d);
5076     gen_sabd_vec(vece, t, a, b);
5077     tcg_gen_add_vec(vece, d, d, t);
5078     tcg_temp_free_vec(t);
5079 }
5080
5081 void gen_gvec_saba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5082                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5083 {
5084     static const TCGOpcode vecop_list[] = {
5085         INDEX_op_sub_vec, INDEX_op_add_vec,
5086         INDEX_op_smin_vec, INDEX_op_smax_vec, 0
5087     };
5088     static const GVecGen3 ops[4] = {
5089         { .fniv = gen_saba_vec,
5090           .fno = gen_helper_gvec_saba_b,
5091           .opt_opc = vecop_list,
5092           .load_dest = true,
5093           .vece = MO_8 },
5094         { .fniv = gen_saba_vec,
5095           .fno = gen_helper_gvec_saba_h,
5096           .opt_opc = vecop_list,
5097           .load_dest = true,
5098           .vece = MO_16 },
5099         { .fni4 = gen_saba_i32,
5100           .fniv = gen_saba_vec,
5101           .fno = gen_helper_gvec_saba_s,
5102           .opt_opc = vecop_list,
5103           .load_dest = true,
5104           .vece = MO_32 },
5105         { .fni8 = gen_saba_i64,
5106           .fniv = gen_saba_vec,
5107           .fno = gen_helper_gvec_saba_d,
5108           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5109           .opt_opc = vecop_list,
5110           .load_dest = true,
5111           .vece = MO_64 },
5112     };
5113     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5114 }
5115
5116 static void gen_uaba_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
5117 {
5118     TCGv_i32 t = tcg_temp_new_i32();
5119     gen_uabd_i32(t, a, b);
5120     tcg_gen_add_i32(d, d, t);
5121     tcg_temp_free_i32(t);
5122 }
5123
5124 static void gen_uaba_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
5125 {
5126     TCGv_i64 t = tcg_temp_new_i64();
5127     gen_uabd_i64(t, a, b);
5128     tcg_gen_add_i64(d, d, t);
5129     tcg_temp_free_i64(t);
5130 }
5131
5132 static void gen_uaba_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
5133 {
5134     TCGv_vec t = tcg_temp_new_vec_matching(d);
5135     gen_uabd_vec(vece, t, a, b);
5136     tcg_gen_add_vec(vece, d, d, t);
5137     tcg_temp_free_vec(t);
5138 }
5139
5140 void gen_gvec_uaba(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
5141                    uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz)
5142 {
5143     static const TCGOpcode vecop_list[] = {
5144         INDEX_op_sub_vec, INDEX_op_add_vec,
5145         INDEX_op_umin_vec, INDEX_op_umax_vec, 0
5146     };
5147     static const GVecGen3 ops[4] = {
5148         { .fniv = gen_uaba_vec,
5149           .fno = gen_helper_gvec_uaba_b,
5150           .opt_opc = vecop_list,
5151           .load_dest = true,
5152           .vece = MO_8 },
5153         { .fniv = gen_uaba_vec,
5154           .fno = gen_helper_gvec_uaba_h,
5155           .opt_opc = vecop_list,
5156           .load_dest = true,
5157           .vece = MO_16 },
5158         { .fni4 = gen_uaba_i32,
5159           .fniv = gen_uaba_vec,
5160           .fno = gen_helper_gvec_uaba_s,
5161           .opt_opc = vecop_list,
5162           .load_dest = true,
5163           .vece = MO_32 },
5164         { .fni8 = gen_uaba_i64,
5165           .fniv = gen_uaba_vec,
5166           .fno = gen_helper_gvec_uaba_d,
5167           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
5168           .opt_opc = vecop_list,
5169           .load_dest = true,
5170           .vece = MO_64 },
5171     };
5172     tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, &ops[vece]);
5173 }
5174
5175 /* Translate a NEON data processing instruction.  Return nonzero if the
5176    instruction is invalid.
5177    We process data in a mixture of 32-bit and 64-bit chunks.
5178    Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
5179
5180 static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
5181 {
5182     int op;
5183     int q;
5184     int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
5185     int size;
5186     int pass;
5187     int u;
5188     int vec_size;
5189     uint32_t imm;
5190     TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
5191     TCGv_ptr ptr1;
5192     TCGv_i64 tmp64;
5193
5194     if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
5195         return 1;
5196     }
5197
5198     /* FIXME: this access check should not take precedence over UNDEF
5199      * for invalid encodings; we will generate incorrect syndrome information
5200      * for attempts to execute invalid vfp/neon encodings with FP disabled.
5201      */
5202     if (s->fp_excp_el) {
5203         gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
5204                            syn_simd_access_trap(1, 0xe, false), s->fp_excp_el);
5205         return 0;
5206     }
5207
5208     if (!s->vfp_enabled)
5209       return 1;
5210     q = (insn & (1 << 6)) != 0;
5211     u = (insn >> 24) & 1;
5212     VFP_DREG_D(rd, insn);
5213     VFP_DREG_N(rn, insn);
5214     VFP_DREG_M(rm, insn);
5215     size = (insn >> 20) & 3;
5216     vec_size = q ? 16 : 8;
5217     rd_ofs = neon_reg_offset(rd, 0);
5218     rn_ofs = neon_reg_offset(rn, 0);
5219     rm_ofs = neon_reg_offset(rm, 0);
5220
5221     if ((insn & (1 << 23)) == 0) {
5222         /* Three register same length: handled by decodetree */
5223         return 1;
5224     } else if (insn & (1 << 4)) {
5225         /* Two registers and shift or reg and imm: handled by decodetree */
5226         return 1;
5227     } else { /* (insn & 0x00800010 == 0x00800000) */
5228         if (size != 3) {
5229             op = (insn >> 8) & 0xf;
5230             if ((insn & (1 << 6)) == 0) {
5231                 /* Three registers of different lengths.  */
5232                 /* undefreq: bit 0 : UNDEF if size == 0
5233                  *           bit 1 : UNDEF if size == 1
5234                  *           bit 2 : UNDEF if size == 2
5235                  *           bit 3 : UNDEF if U == 1
5236                  * Note that [2:0] set implies 'always UNDEF'
5237                  */
5238                 int undefreq;
5239                 /* prewiden, src1_wide, src2_wide, undefreq */
5240                 static const int neon_3reg_wide[16][4] = {
5241                     {0, 0, 0, 7}, /* VADDL: handled by decodetree */
5242                     {0, 0, 0, 7}, /* VADDW: handled by decodetree */
5243                     {0, 0, 0, 7}, /* VSUBL: handled by decodetree */
5244                     {0, 0, 0, 7}, /* VSUBW: handled by decodetree */
5245                     {0, 0, 0, 7}, /* VADDHN: handled by decodetree */
5246                     {0, 0, 0, 7}, /* VABAL */
5247                     {0, 0, 0, 7}, /* VSUBHN: handled by decodetree */
5248                     {0, 0, 0, 7}, /* VABDL */
5249                     {0, 0, 0, 7}, /* VMLAL */
5250                     {0, 0, 0, 7}, /* VQDMLAL */
5251                     {0, 0, 0, 7}, /* VMLSL */
5252                     {0, 0, 0, 7}, /* VQDMLSL */
5253                     {0, 0, 0, 7}, /* Integer VMULL */
5254                     {0, 0, 0, 7}, /* VQDMULL */
5255                     {0, 0, 0, 0xa}, /* Polynomial VMULL */
5256                     {0, 0, 0, 7}, /* Reserved: always UNDEF */
5257                 };
5258
5259                 undefreq = neon_3reg_wide[op][3];
5260
5261                 if ((undefreq & (1 << size)) ||
5262                     ((undefreq & 8) && u)) {
5263                     return 1;
5264                 }
5265                 if (rd & 1) {
5266                     return 1;
5267                 }
5268
5269                 /* Handle polynomial VMULL in a single pass.  */
5270                 if (op == 14) {
5271                     if (size == 0) {
5272                         /* VMULL.P8 */
5273                         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
5274                                            0, gen_helper_neon_pmull_h);
5275                     } else {
5276                         /* VMULL.P64 */
5277                         if (!dc_isar_feature(aa32_pmull, s)) {
5278                             return 1;
5279                         }
5280                         tcg_gen_gvec_3_ool(rd_ofs, rn_ofs, rm_ofs, 16, 16,
5281                                            0, gen_helper_gvec_pmull_q);
5282                     }
5283                     return 0;
5284                 }
5285                 abort(); /* all others handled by decodetree */
5286             } else {
5287                 /* Two registers and a scalar. NB that for ops of this form
5288                  * the ARM ARM labels bit 24 as Q, but it is in our variable
5289                  * 'u', not 'q'.
5290                  */
5291                 if (size == 0) {
5292                     return 1;
5293                 }
5294                 switch (op) {
5295                 case 1: /* Float VMLA scalar */
5296                 case 5: /* Floating point VMLS scalar */
5297                 case 9: /* Floating point VMUL scalar */
5298                     if (size == 1) {
5299                         return 1;
5300                     }
5301                     /* fall through */
5302                 case 0: /* Integer VMLA scalar */
5303                 case 4: /* Integer VMLS scalar */
5304                 case 8: /* Integer VMUL scalar */
5305                 case 12: /* VQDMULH scalar */
5306                 case 13: /* VQRDMULH scalar */
5307                     if (u && ((rd | rn) & 1)) {
5308                         return 1;
5309                     }
5310                     tmp = neon_get_scalar(size, rm);
5311                     neon_store_scratch(0, tmp);
5312                     for (pass = 0; pass < (u ? 4 : 2); pass++) {
5313                         tmp = neon_load_scratch(0);
5314                         tmp2 = neon_load_reg(rn, pass);
5315                         if (op == 12) {
5316                             if (size == 1) {
5317                                 gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5318                             } else {
5319                                 gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5320                             }
5321                         } else if (op == 13) {
5322                             if (size == 1) {
5323                                 gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5324                             } else {
5325                                 gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5326                             }
5327                         } else if (op & 1) {
5328                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5329                             gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5330                             tcg_temp_free_ptr(fpstatus);
5331                         } else {
5332                             switch (size) {
5333                             case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5334                             case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5335                             case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5336                             default: abort();
5337                             }
5338                         }
5339                         tcg_temp_free_i32(tmp2);
5340                         if (op < 8) {
5341                             /* Accumulate.  */
5342                             tmp2 = neon_load_reg(rd, pass);
5343                             switch (op) {
5344                             case 0:
5345                                 gen_neon_add(size, tmp, tmp2);
5346                                 break;
5347                             case 1:
5348                             {
5349                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5350                                 gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5351                                 tcg_temp_free_ptr(fpstatus);
5352                                 break;
5353                             }
5354                             case 4:
5355                                 gen_neon_rsb(size, tmp, tmp2);
5356                                 break;
5357                             case 5:
5358                             {
5359                                 TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5360                                 gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5361                                 tcg_temp_free_ptr(fpstatus);
5362                                 break;
5363                             }
5364                             default:
5365                                 abort();
5366                             }
5367                             tcg_temp_free_i32(tmp2);
5368                         }
5369                         neon_store_reg(rd, pass, tmp);
5370                     }
5371                     break;
5372                 case 3: /* VQDMLAL scalar */
5373                 case 7: /* VQDMLSL scalar */
5374                 case 11: /* VQDMULL scalar */
5375                     if (u == 1) {
5376                         return 1;
5377                     }
5378                     /* fall through */
5379                 case 2: /* VMLAL sclar */
5380                 case 6: /* VMLSL scalar */
5381                 case 10: /* VMULL scalar */
5382                     if (rd & 1) {
5383                         return 1;
5384                     }
5385                     tmp2 = neon_get_scalar(size, rm);
5386                     /* We need a copy of tmp2 because gen_neon_mull
5387                      * deletes it during pass 0.  */
5388                     tmp4 = tcg_temp_new_i32();
5389                     tcg_gen_mov_i32(tmp4, tmp2);
5390                     tmp3 = neon_load_reg(rn, 1);
5391
5392                     for (pass = 0; pass < 2; pass++) {
5393                         if (pass == 0) {
5394                             tmp = neon_load_reg(rn, 0);
5395                         } else {
5396                             tmp = tmp3;
5397                             tmp2 = tmp4;
5398                         }
5399                         gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5400                         if (op != 11) {
5401                             neon_load_reg64(cpu_V1, rd + pass);
5402                         }
5403                         switch (op) {
5404                         case 6:
5405                             gen_neon_negl(cpu_V0, size);
5406                             /* Fall through */
5407                         case 2:
5408                             gen_neon_addl(size);
5409                             break;
5410                         case 3: case 7:
5411                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5412                             if (op == 7) {
5413                                 gen_neon_negl(cpu_V0, size);
5414                             }
5415                             gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5416                             break;
5417                         case 10:
5418                             /* no-op */
5419                             break;
5420                         case 11:
5421                             gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5422                             break;
5423                         default:
5424                             abort();
5425                         }
5426                         neon_store_reg64(cpu_V0, rd + pass);
5427                     }
5428                     break;
5429                 case 14: /* VQRDMLAH scalar */
5430                 case 15: /* VQRDMLSH scalar */
5431                     {
5432                         NeonGenThreeOpEnvFn *fn;
5433
5434                         if (!dc_isar_feature(aa32_rdm, s)) {
5435                             return 1;
5436                         }
5437                         if (u && ((rd | rn) & 1)) {
5438                             return 1;
5439                         }
5440                         if (op == 14) {
5441                             if (size == 1) {
5442                                 fn = gen_helper_neon_qrdmlah_s16;
5443                             } else {
5444                                 fn = gen_helper_neon_qrdmlah_s32;
5445                             }
5446                         } else {
5447                             if (size == 1) {
5448                                 fn = gen_helper_neon_qrdmlsh_s16;
5449                             } else {
5450                                 fn = gen_helper_neon_qrdmlsh_s32;
5451                             }
5452                         }
5453
5454                         tmp2 = neon_get_scalar(size, rm);
5455                         for (pass = 0; pass < (u ? 4 : 2); pass++) {
5456                             tmp = neon_load_reg(rn, pass);
5457                             tmp3 = neon_load_reg(rd, pass);
5458                             fn(tmp, cpu_env, tmp, tmp2, tmp3);
5459                             tcg_temp_free_i32(tmp3);
5460                             neon_store_reg(rd, pass, tmp);
5461                         }
5462                         tcg_temp_free_i32(tmp2);
5463                     }
5464                     break;
5465                 default:
5466                     g_assert_not_reached();
5467                 }
5468             }
5469         } else { /* size == 3 */
5470             if (!u) {
5471                 /* Extract.  */
5472                 imm = (insn >> 8) & 0xf;
5473
5474                 if (imm > 7 && !q)
5475                     return 1;
5476
5477                 if (q && ((rd | rn | rm) & 1)) {
5478                     return 1;
5479                 }
5480
5481                 if (imm == 0) {
5482                     neon_load_reg64(cpu_V0, rn);
5483                     if (q) {
5484                         neon_load_reg64(cpu_V1, rn + 1);
5485                     }
5486                 } else if (imm == 8) {
5487                     neon_load_reg64(cpu_V0, rn + 1);
5488                     if (q) {
5489                         neon_load_reg64(cpu_V1, rm);
5490                     }
5491                 } else if (q) {
5492                     tmp64 = tcg_temp_new_i64();
5493                     if (imm < 8) {
5494                         neon_load_reg64(cpu_V0, rn);
5495                         neon_load_reg64(tmp64, rn + 1);
5496                     } else {
5497                         neon_load_reg64(cpu_V0, rn + 1);
5498                         neon_load_reg64(tmp64, rm);
5499                     }
5500                     tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
5501                     tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
5502                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5503                     if (imm < 8) {
5504                         neon_load_reg64(cpu_V1, rm);
5505                     } else {
5506                         neon_load_reg64(cpu_V1, rm + 1);
5507                         imm -= 8;
5508                     }
5509                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5510                     tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
5511                     tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
5512                     tcg_temp_free_i64(tmp64);
5513                 } else {
5514                     /* BUGFIX */
5515                     neon_load_reg64(cpu_V0, rn);
5516                     tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
5517                     neon_load_reg64(cpu_V1, rm);
5518                     tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5519                     tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5520                 }
5521                 neon_store_reg64(cpu_V0, rd);
5522                 if (q) {
5523                     neon_store_reg64(cpu_V1, rd + 1);
5524                 }
5525             } else if ((insn & (1 << 11)) == 0) {
5526                 /* Two register misc.  */
5527                 op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
5528                 size = (insn >> 18) & 3;
5529                 /* UNDEF for unknown op values and bad op-size combinations */
5530                 if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
5531                     return 1;
5532                 }
5533                 if (neon_2rm_is_v8_op(op) &&
5534                     !arm_dc_feature(s, ARM_FEATURE_V8)) {
5535                     return 1;
5536                 }
5537                 if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
5538                     q && ((rm | rd) & 1)) {
5539                     return 1;
5540                 }
5541                 switch (op) {
5542                 case NEON_2RM_VREV64:
5543                     for (pass = 0; pass < (q ? 2 : 1); pass++) {
5544                         tmp = neon_load_reg(rm, pass * 2);
5545                         tmp2 = neon_load_reg(rm, pass * 2 + 1);
5546                         switch (size) {
5547                         case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
5548                         case 1: gen_swap_half(tmp); break;
5549                         case 2: /* no-op */ break;
5550                         default: abort();
5551                         }
5552                         neon_store_reg(rd, pass * 2 + 1, tmp);
5553                         if (size == 2) {
5554                             neon_store_reg(rd, pass * 2, tmp2);
5555                         } else {
5556                             switch (size) {
5557                             case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
5558                             case 1: gen_swap_half(tmp2); break;
5559                             default: abort();
5560                             }
5561                             neon_store_reg(rd, pass * 2, tmp2);
5562                         }
5563                     }
5564                     break;
5565                 case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
5566                 case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
5567                     for (pass = 0; pass < q + 1; pass++) {
5568                         tmp = neon_load_reg(rm, pass * 2);
5569                         gen_neon_widen(cpu_V0, tmp, size, op & 1);
5570                         tmp = neon_load_reg(rm, pass * 2 + 1);
5571                         gen_neon_widen(cpu_V1, tmp, size, op & 1);
5572                         switch (size) {
5573                         case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
5574                         case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
5575                         case 2: tcg_gen_add_i64(CPU_V001); break;
5576                         default: abort();
5577                         }
5578                         if (op >= NEON_2RM_VPADAL) {
5579                             /* Accumulate.  */
5580                             neon_load_reg64(cpu_V1, rd + pass);
5581                             gen_neon_addl(size);
5582                         }
5583                         neon_store_reg64(cpu_V0, rd + pass);
5584                     }
5585                     break;
5586                 case NEON_2RM_VTRN:
5587                     if (size == 2) {
5588                         int n;
5589                         for (n = 0; n < (q ? 4 : 2); n += 2) {
5590                             tmp = neon_load_reg(rm, n);
5591                             tmp2 = neon_load_reg(rd, n + 1);
5592                             neon_store_reg(rm, n, tmp2);
5593                             neon_store_reg(rd, n + 1, tmp);
5594                         }
5595                     } else {
5596                         goto elementwise;
5597                     }
5598                     break;
5599                 case NEON_2RM_VUZP:
5600                     if (gen_neon_unzip(rd, rm, size, q)) {
5601                         return 1;
5602                     }
5603                     break;
5604                 case NEON_2RM_VZIP:
5605                     if (gen_neon_zip(rd, rm, size, q)) {
5606                         return 1;
5607                     }
5608                     break;
5609                 case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
5610                     /* also VQMOVUN; op field and mnemonics don't line up */
5611                     if (rm & 1) {
5612                         return 1;
5613                     }
5614                     tmp2 = NULL;
5615                     for (pass = 0; pass < 2; pass++) {
5616                         neon_load_reg64(cpu_V0, rm + pass);
5617                         tmp = tcg_temp_new_i32();
5618                         gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
5619                                            tmp, cpu_V0);
5620                         if (pass == 0) {
5621                             tmp2 = tmp;
5622                         } else {
5623                             neon_store_reg(rd, 0, tmp2);
5624                             neon_store_reg(rd, 1, tmp);
5625                         }
5626                     }
5627                     break;
5628                 case NEON_2RM_VSHLL:
5629                     if (q || (rd & 1)) {
5630                         return 1;
5631                     }
5632                     tmp = neon_load_reg(rm, 0);
5633                     tmp2 = neon_load_reg(rm, 1);
5634                     for (pass = 0; pass < 2; pass++) {
5635                         if (pass == 1)
5636                             tmp = tmp2;
5637                         gen_neon_widen(cpu_V0, tmp, size, 1);
5638                         tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
5639                         neon_store_reg64(cpu_V0, rd + pass);
5640                     }
5641                     break;
5642                 case NEON_2RM_VCVT_F16_F32:
5643                 {
5644                     TCGv_ptr fpst;
5645                     TCGv_i32 ahp;
5646
5647                     if (!dc_isar_feature(aa32_fp16_spconv, s) ||
5648                         q || (rm & 1)) {
5649                         return 1;
5650                     }
5651                     fpst = get_fpstatus_ptr(true);
5652                     ahp = get_ahp_flag();
5653                     tmp = neon_load_reg(rm, 0);
5654                     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
5655                     tmp2 = neon_load_reg(rm, 1);
5656                     gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
5657                     tcg_gen_shli_i32(tmp2, tmp2, 16);
5658                     tcg_gen_or_i32(tmp2, tmp2, tmp);
5659                     tcg_temp_free_i32(tmp);
5660                     tmp = neon_load_reg(rm, 2);
5661                     gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
5662                     tmp3 = neon_load_reg(rm, 3);
5663                     neon_store_reg(rd, 0, tmp2);
5664                     gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
5665                     tcg_gen_shli_i32(tmp3, tmp3, 16);
5666                     tcg_gen_or_i32(tmp3, tmp3, tmp);
5667                     neon_store_reg(rd, 1, tmp3);
5668                     tcg_temp_free_i32(tmp);
5669                     tcg_temp_free_i32(ahp);
5670                     tcg_temp_free_ptr(fpst);
5671                     break;
5672                 }
5673                 case NEON_2RM_VCVT_F32_F16:
5674                 {
5675                     TCGv_ptr fpst;
5676                     TCGv_i32 ahp;
5677                     if (!dc_isar_feature(aa32_fp16_spconv, s) ||
5678                         q || (rd & 1)) {
5679                         return 1;
5680                     }
5681                     fpst = get_fpstatus_ptr(true);
5682                     ahp = get_ahp_flag();
5683                     tmp3 = tcg_temp_new_i32();
5684                     tmp = neon_load_reg(rm, 0);
5685                     tmp2 = neon_load_reg(rm, 1);
5686                     tcg_gen_ext16u_i32(tmp3, tmp);
5687                     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
5688                     neon_store_reg(rd, 0, tmp3);
5689                     tcg_gen_shri_i32(tmp, tmp, 16);
5690                     gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
5691                     neon_store_reg(rd, 1, tmp);
5692                     tmp3 = tcg_temp_new_i32();
5693                     tcg_gen_ext16u_i32(tmp3, tmp2);
5694                     gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
5695                     neon_store_reg(rd, 2, tmp3);
5696                     tcg_gen_shri_i32(tmp2, tmp2, 16);
5697                     gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
5698                     neon_store_reg(rd, 3, tmp2);
5699                     tcg_temp_free_i32(ahp);
5700                     tcg_temp_free_ptr(fpst);
5701                     break;
5702                 }
5703                 case NEON_2RM_AESE: case NEON_2RM_AESMC:
5704                     if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
5705                         return 1;
5706                     }
5707                     /*
5708                      * Bit 6 is the lowest opcode bit; it distinguishes
5709                      * between encryption (AESE/AESMC) and decryption
5710                      * (AESD/AESIMC).
5711                      */
5712                     if (op == NEON_2RM_AESE) {
5713                         tcg_gen_gvec_3_ool(vfp_reg_offset(true, rd),
5714                                            vfp_reg_offset(true, rd),
5715                                            vfp_reg_offset(true, rm),
5716                                            16, 16, extract32(insn, 6, 1),
5717                                            gen_helper_crypto_aese);
5718                     } else {
5719                         tcg_gen_gvec_2_ool(vfp_reg_offset(true, rd),
5720                                            vfp_reg_offset(true, rm),
5721                                            16, 16, extract32(insn, 6, 1),
5722                                            gen_helper_crypto_aesmc);
5723                     }
5724                     break;
5725                 case NEON_2RM_SHA1H:
5726                     if (!dc_isar_feature(aa32_sha1, s) || ((rm | rd) & 1)) {
5727                         return 1;
5728                     }
5729                     tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0,
5730                                        gen_helper_crypto_sha1h);
5731                     break;
5732                 case NEON_2RM_SHA1SU1:
5733                     if ((rm | rd) & 1) {
5734                             return 1;
5735                     }
5736                     /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
5737                     if (q) {
5738                         if (!dc_isar_feature(aa32_sha2, s)) {
5739                             return 1;
5740                         }
5741                     } else if (!dc_isar_feature(aa32_sha1, s)) {
5742                         return 1;
5743                     }
5744                     tcg_gen_gvec_2_ool(rd_ofs, rm_ofs, 16, 16, 0,
5745                                        q ? gen_helper_crypto_sha256su0
5746                                        : gen_helper_crypto_sha1su1);
5747                     break;
5748                 case NEON_2RM_VMVN:
5749                     tcg_gen_gvec_not(0, rd_ofs, rm_ofs, vec_size, vec_size);
5750                     break;
5751                 case NEON_2RM_VNEG:
5752                     tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
5753                     break;
5754                 case NEON_2RM_VABS:
5755                     tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
5756                     break;
5757
5758                 case NEON_2RM_VCEQ0:
5759                     gen_gvec_ceq0(size, rd_ofs, rm_ofs, vec_size, vec_size);
5760                     break;
5761                 case NEON_2RM_VCGT0:
5762                     gen_gvec_cgt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
5763                     break;
5764                 case NEON_2RM_VCLE0:
5765                     gen_gvec_cle0(size, rd_ofs, rm_ofs, vec_size, vec_size);
5766                     break;
5767                 case NEON_2RM_VCGE0:
5768                     gen_gvec_cge0(size, rd_ofs, rm_ofs, vec_size, vec_size);
5769                     break;
5770                 case NEON_2RM_VCLT0:
5771                     gen_gvec_clt0(size, rd_ofs, rm_ofs, vec_size, vec_size);
5772                     break;
5773
5774                 default:
5775                 elementwise:
5776                     for (pass = 0; pass < (q ? 4 : 2); pass++) {
5777                         tmp = neon_load_reg(rm, pass);
5778                         switch (op) {
5779                         case NEON_2RM_VREV32:
5780                             switch (size) {
5781                             case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
5782                             case 1: gen_swap_half(tmp); break;
5783                             default: abort();
5784                             }
5785                             break;
5786                         case NEON_2RM_VREV16:
5787                             gen_rev16(tmp, tmp);
5788                             break;
5789                         case NEON_2RM_VCLS:
5790                             switch (size) {
5791                             case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
5792                             case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
5793                             case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
5794                             default: abort();
5795                             }
5796                             break;
5797                         case NEON_2RM_VCLZ:
5798                             switch (size) {
5799                             case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
5800                             case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
5801                             case 2: tcg_gen_clzi_i32(tmp, tmp, 32); break;
5802                             default: abort();
5803                             }
5804                             break;
5805                         case NEON_2RM_VCNT:
5806                             gen_helper_neon_cnt_u8(tmp, tmp);
5807                             break;
5808                         case NEON_2RM_VQABS:
5809                             switch (size) {
5810                             case 0:
5811                                 gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
5812                                 break;
5813                             case 1:
5814                                 gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
5815                                 break;
5816                             case 2:
5817                                 gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
5818                                 break;
5819                             default: abort();
5820                             }
5821                             break;
5822                         case NEON_2RM_VQNEG:
5823                             switch (size) {
5824                             case 0:
5825                                 gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
5826                                 break;
5827                             case 1:
5828                                 gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
5829                                 break;
5830                             case 2:
5831                                 gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
5832                                 break;
5833                             default: abort();
5834                             }
5835                             break;
5836                         case NEON_2RM_VCGT0_F:
5837                         {
5838                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5839                             tmp2 = tcg_const_i32(0);
5840                             gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5841                             tcg_temp_free_i32(tmp2);
5842                             tcg_temp_free_ptr(fpstatus);
5843                             break;
5844                         }
5845                         case NEON_2RM_VCGE0_F:
5846                         {
5847                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5848                             tmp2 = tcg_const_i32(0);
5849                             gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5850                             tcg_temp_free_i32(tmp2);
5851                             tcg_temp_free_ptr(fpstatus);
5852                             break;
5853                         }
5854                         case NEON_2RM_VCEQ0_F:
5855                         {
5856                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5857                             tmp2 = tcg_const_i32(0);
5858                             gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5859                             tcg_temp_free_i32(tmp2);
5860                             tcg_temp_free_ptr(fpstatus);
5861                             break;
5862                         }
5863                         case NEON_2RM_VCLE0_F:
5864                         {
5865                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5866                             tmp2 = tcg_const_i32(0);
5867                             gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
5868                             tcg_temp_free_i32(tmp2);
5869                             tcg_temp_free_ptr(fpstatus);
5870                             break;
5871                         }
5872                         case NEON_2RM_VCLT0_F:
5873                         {
5874                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5875                             tmp2 = tcg_const_i32(0);
5876                             gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
5877                             tcg_temp_free_i32(tmp2);
5878                             tcg_temp_free_ptr(fpstatus);
5879                             break;
5880                         }
5881                         case NEON_2RM_VABS_F:
5882                             gen_helper_vfp_abss(tmp, tmp);
5883                             break;
5884                         case NEON_2RM_VNEG_F:
5885                             gen_helper_vfp_negs(tmp, tmp);
5886                             break;
5887                         case NEON_2RM_VSWP:
5888                             tmp2 = neon_load_reg(rd, pass);
5889                             neon_store_reg(rm, pass, tmp2);
5890                             break;
5891                         case NEON_2RM_VTRN:
5892                             tmp2 = neon_load_reg(rd, pass);
5893                             switch (size) {
5894                             case 0: gen_neon_trn_u8(tmp, tmp2); break;
5895                             case 1: gen_neon_trn_u16(tmp, tmp2); break;
5896                             default: abort();
5897                             }
5898                             neon_store_reg(rm, pass, tmp2);
5899                             break;
5900                         case NEON_2RM_VRINTN:
5901                         case NEON_2RM_VRINTA:
5902                         case NEON_2RM_VRINTM:
5903                         case NEON_2RM_VRINTP:
5904                         case NEON_2RM_VRINTZ:
5905                         {
5906                             TCGv_i32 tcg_rmode;
5907                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5908                             int rmode;
5909
5910                             if (op == NEON_2RM_VRINTZ) {
5911                                 rmode = FPROUNDING_ZERO;
5912                             } else {
5913                                 rmode = fp_decode_rm[((op & 0x6) >> 1) ^ 1];
5914                             }
5915
5916                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5917                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
5918                                                       cpu_env);
5919                             gen_helper_rints(tmp, tmp, fpstatus);
5920                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
5921                                                       cpu_env);
5922                             tcg_temp_free_ptr(fpstatus);
5923                             tcg_temp_free_i32(tcg_rmode);
5924                             break;
5925                         }
5926                         case NEON_2RM_VRINTX:
5927                         {
5928                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5929                             gen_helper_rints_exact(tmp, tmp, fpstatus);
5930                             tcg_temp_free_ptr(fpstatus);
5931                             break;
5932                         }
5933                         case NEON_2RM_VCVTAU:
5934                         case NEON_2RM_VCVTAS:
5935                         case NEON_2RM_VCVTNU:
5936                         case NEON_2RM_VCVTNS:
5937                         case NEON_2RM_VCVTPU:
5938                         case NEON_2RM_VCVTPS:
5939                         case NEON_2RM_VCVTMU:
5940                         case NEON_2RM_VCVTMS:
5941                         {
5942                             bool is_signed = !extract32(insn, 7, 1);
5943                             TCGv_ptr fpst = get_fpstatus_ptr(1);
5944                             TCGv_i32 tcg_rmode, tcg_shift;
5945                             int rmode = fp_decode_rm[extract32(insn, 8, 2)];
5946
5947                             tcg_shift = tcg_const_i32(0);
5948                             tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5949                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
5950                                                       cpu_env);
5951
5952                             if (is_signed) {
5953                                 gen_helper_vfp_tosls(tmp, tmp,
5954                                                      tcg_shift, fpst);
5955                             } else {
5956                                 gen_helper_vfp_touls(tmp, tmp,
5957                                                      tcg_shift, fpst);
5958                             }
5959
5960                             gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
5961                                                       cpu_env);
5962                             tcg_temp_free_i32(tcg_rmode);
5963                             tcg_temp_free_i32(tcg_shift);
5964                             tcg_temp_free_ptr(fpst);
5965                             break;
5966                         }
5967                         case NEON_2RM_VRECPE:
5968                             gen_helper_recpe_u32(tmp, tmp);
5969                             break;
5970                         case NEON_2RM_VRSQRTE:
5971                             gen_helper_rsqrte_u32(tmp, tmp);
5972                             break;
5973                         case NEON_2RM_VRECPE_F:
5974                         {
5975                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5976                             gen_helper_recpe_f32(tmp, tmp, fpstatus);
5977                             tcg_temp_free_ptr(fpstatus);
5978                             break;
5979                         }
5980                         case NEON_2RM_VRSQRTE_F:
5981                         {
5982                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5983                             gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
5984                             tcg_temp_free_ptr(fpstatus);
5985                             break;
5986                         }
5987                         case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
5988                         {
5989                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5990                             gen_helper_vfp_sitos(tmp, tmp, fpstatus);
5991                             tcg_temp_free_ptr(fpstatus);
5992                             break;
5993                         }
5994                         case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
5995                         {
5996                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5997                             gen_helper_vfp_uitos(tmp, tmp, fpstatus);
5998                             tcg_temp_free_ptr(fpstatus);
5999                             break;
6000                         }
6001                         case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
6002                         {
6003                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6004                             gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
6005                             tcg_temp_free_ptr(fpstatus);
6006                             break;
6007                         }
6008                         case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
6009                         {
6010                             TCGv_ptr fpstatus = get_fpstatus_ptr(1);
6011                             gen_helper_vfp_touizs(tmp, tmp, fpstatus);
6012                             tcg_temp_free_ptr(fpstatus);
6013                             break;
6014                         }
6015                         default:
6016                             /* Reserved op values were caught by the
6017                              * neon_2rm_sizes[] check earlier.
6018                              */
6019                             abort();
6020                         }
6021                         neon_store_reg(rd, pass, tmp);
6022                     }
6023                     break;
6024                 }
6025             } else if ((insn & (1 << 10)) == 0) {
6026                 /* VTBL, VTBX.  */
6027                 int n = ((insn >> 8) & 3) + 1;
6028                 if ((rn + n) > 32) {
6029                     /* This is UNPREDICTABLE; we choose to UNDEF to avoid the
6030                      * helper function running off the end of the register file.
6031                      */
6032                     return 1;
6033                 }
6034                 n <<= 3;
6035                 if (insn & (1 << 6)) {
6036                     tmp = neon_load_reg(rd, 0);
6037                 } else {
6038                     tmp = tcg_temp_new_i32();
6039                     tcg_gen_movi_i32(tmp, 0);
6040                 }
6041                 tmp2 = neon_load_reg(rm, 0);
6042                 ptr1 = vfp_reg_ptr(true, rn);
6043                 tmp5 = tcg_const_i32(n);
6044                 gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
6045                 tcg_temp_free_i32(tmp);
6046                 if (insn & (1 << 6)) {
6047                     tmp = neon_load_reg(rd, 1);
6048                 } else {
6049                     tmp = tcg_temp_new_i32();
6050                     tcg_gen_movi_i32(tmp, 0);
6051                 }
6052                 tmp3 = neon_load_reg(rm, 1);
6053                 gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
6054                 tcg_temp_free_i32(tmp5);
6055                 tcg_temp_free_ptr(ptr1);
6056                 neon_store_reg(rd, 0, tmp2);
6057                 neon_store_reg(rd, 1, tmp3);
6058                 tcg_temp_free_i32(tmp);
6059             } else if ((insn & 0x380) == 0) {
6060                 /* VDUP */
6061                 int element;
6062                 MemOp size;
6063
6064                 if ((insn & (7 << 16)) == 0 || (q && (rd & 1))) {
6065                     return 1;
6066                 }
6067                 if (insn & (1 << 16)) {
6068                     size = MO_8;
6069                     element = (insn >> 17) & 7;
6070                 } else if (insn & (1 << 17)) {
6071                     size = MO_16;
6072                     element = (insn >> 18) & 3;
6073                 } else {
6074                     size = MO_32;
6075                     element = (insn >> 19) & 1;
6076                 }
6077                 tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, 0),
6078                                      neon_element_offset(rm, element, size),
6079                                      q ? 16 : 8, q ? 16 : 8);
6080             } else {
6081                 return 1;
6082             }
6083         }
6084     }
6085     return 0;
6086 }
6087
6088 static int disas_coproc_insn(DisasContext *s, uint32_t insn)
6089 {
6090     int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
6091     const ARMCPRegInfo *ri;
6092
6093     cpnum = (insn >> 8) & 0xf;
6094
6095     /* First check for coprocessor space used for XScale/iwMMXt insns */
6096     if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < 2)) {
6097         if (extract32(s->c15_cpar, cpnum, 1) == 0) {
6098             return 1;
6099         }
6100         if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
6101             return disas_iwmmxt_insn(s, insn);
6102         } else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
6103             return disas_dsp_insn(s, insn);
6104         }
6105         return 1;
6106     }
6107
6108     /* Otherwise treat as a generic register access */
6109     is64 = (insn & (1 << 25)) == 0;
6110     if (!is64 && ((insn & (1 << 4)) == 0)) {
6111         /* cdp */
6112         return 1;
6113     }
6114
6115     crm = insn & 0xf;
6116     if (is64) {
6117         crn = 0;
6118         opc1 = (insn >> 4) & 0xf;
6119         opc2 = 0;
6120         rt2 = (insn >> 16) & 0xf;
6121     } else {
6122         crn = (insn >> 16) & 0xf;
6123         opc1 = (insn >> 21) & 7;
6124         opc2 = (insn >> 5) & 7;
6125         rt2 = 0;
6126     }
6127     isread = (insn >> 20) & 1;
6128     rt = (insn >> 12) & 0xf;
6129
6130     ri = get_arm_cp_reginfo(s->cp_regs,
6131             ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
6132     if (ri) {
6133         bool need_exit_tb;
6134
6135         /* Check access permissions */
6136         if (!cp_access_ok(s->current_el, ri, isread)) {
6137             return 1;
6138         }
6139
6140         if (s->hstr_active || ri->accessfn ||
6141             (arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < 14)) {
6142             /* Emit code to perform further access permissions checks at
6143              * runtime; this may result in an exception.
6144              * Note that on XScale all cp0..c13 registers do an access check
6145              * call in order to handle c15_cpar.
6146              */
6147             TCGv_ptr tmpptr;
6148             TCGv_i32 tcg_syn, tcg_isread;
6149             uint32_t syndrome;
6150
6151             /* Note that since we are an implementation which takes an
6152              * exception on a trapped conditional instruction only if the
6153              * instruction passes its condition code check, we can take
6154              * advantage of the clause in the ARM ARM that allows us to set
6155              * the COND field in the instruction to 0xE in all cases.
6156              * We could fish the actual condition out of the insn (ARM)
6157              * or the condexec bits (Thumb) but it isn't necessary.
6158              */
6159             switch (cpnum) {
6160             case 14:
6161                 if (is64) {
6162                     syndrome = syn_cp14_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6163                                                  isread, false);
6164                 } else {
6165                     syndrome = syn_cp14_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6166                                                 rt, isread, false);
6167                 }
6168                 break;
6169             case 15:
6170                 if (is64) {
6171                     syndrome = syn_cp15_rrt_trap(1, 0xe, opc1, crm, rt, rt2,
6172                                                  isread, false);
6173                 } else {
6174                     syndrome = syn_cp15_rt_trap(1, 0xe, opc1, opc2, crn, crm,
6175                                                 rt, isread, false);
6176                 }
6177                 break;
6178             default:
6179                 /* ARMv8 defines that only coprocessors 14 and 15 exist,
6180                  * so this can only happen if this is an ARMv7 or earlier CPU,
6181                  * in which case the syndrome information won't actually be
6182                  * guest visible.
6183                  */
6184                 assert(!arm_dc_feature(s, ARM_FEATURE_V8));
6185                 syndrome = syn_uncategorized();
6186                 break;
6187             }
6188
6189             gen_set_condexec(s);
6190             gen_set_pc_im(s, s->pc_curr);
6191             tmpptr = tcg_const_ptr(ri);
6192             tcg_syn = tcg_const_i32(syndrome);
6193             tcg_isread = tcg_const_i32(isread);
6194             gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
6195                                            tcg_isread);
6196             tcg_temp_free_ptr(tmpptr);
6197             tcg_temp_free_i32(tcg_syn);
6198             tcg_temp_free_i32(tcg_isread);
6199         } else if (ri->type & ARM_CP_RAISES_EXC) {
6200             /*
6201              * The readfn or writefn might raise an exception;
6202              * synchronize the CPU state in case it does.
6203              */
6204             gen_set_condexec(s);
6205             gen_set_pc_im(s, s->pc_curr);
6206         }
6207
6208         /* Handle special cases first */
6209         switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
6210         case ARM_CP_NOP:
6211             return 0;
6212         case ARM_CP_WFI:
6213             if (isread) {
6214                 return 1;
6215             }
6216             gen_set_pc_im(s, s->base.pc_next);
6217             s->base.is_jmp = DISAS_WFI;
6218             return 0;
6219         default:
6220             break;
6221         }
6222
6223         if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
6224             gen_io_start();
6225         }
6226
6227         if (isread) {
6228             /* Read */
6229             if (is64) {
6230                 TCGv_i64 tmp64;
6231                 TCGv_i32 tmp;
6232                 if (ri->type & ARM_CP_CONST) {
6233                     tmp64 = tcg_const_i64(ri->resetvalue);
6234                 } else if (ri->readfn) {
6235                     TCGv_ptr tmpptr;
6236                     tmp64 = tcg_temp_new_i64();
6237                     tmpptr = tcg_const_ptr(ri);
6238                     gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
6239                     tcg_temp_free_ptr(tmpptr);
6240                 } else {
6241                     tmp64 = tcg_temp_new_i64();
6242                     tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
6243                 }
6244                 tmp = tcg_temp_new_i32();
6245                 tcg_gen_extrl_i64_i32(tmp, tmp64);
6246                 store_reg(s, rt, tmp);
6247                 tmp = tcg_temp_new_i32();
6248                 tcg_gen_extrh_i64_i32(tmp, tmp64);
6249                 tcg_temp_free_i64(tmp64);
6250                 store_reg(s, rt2, tmp);
6251             } else {
6252                 TCGv_i32 tmp;
6253                 if (ri->type & ARM_CP_CONST) {
6254                     tmp = tcg_const_i32(ri->resetvalue);
6255                 } else if (ri->readfn) {
6256                     TCGv_ptr tmpptr;
6257                     tmp = tcg_temp_new_i32();
6258                     tmpptr = tcg_const_ptr(ri);
6259                     gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
6260                     tcg_temp_free_ptr(tmpptr);
6261                 } else {
6262                     tmp = load_cpu_offset(ri->fieldoffset);
6263                 }
6264                 if (rt == 15) {
6265                     /* Destination register of r15 for 32 bit loads sets
6266                      * the condition codes from the high 4 bits of the value
6267                      */
6268                     gen_set_nzcv(tmp);
6269                     tcg_temp_free_i32(tmp);
6270                 } else {
6271                     store_reg(s, rt, tmp);
6272                 }
6273             }
6274         } else {
6275             /* Write */
6276             if (ri->type & ARM_CP_CONST) {
6277                 /* If not forbidden by access permissions, treat as WI */
6278                 return 0;
6279             }
6280
6281             if (is64) {
6282                 TCGv_i32 tmplo, tmphi;
6283                 TCGv_i64 tmp64 = tcg_temp_new_i64();
6284                 tmplo = load_reg(s, rt);
6285                 tmphi = load_reg(s, rt2);
6286                 tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
6287                 tcg_temp_free_i32(tmplo);
6288                 tcg_temp_free_i32(tmphi);
6289                 if (ri->writefn) {
6290                     TCGv_ptr tmpptr = tcg_const_ptr(ri);
6291                     gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
6292                     tcg_temp_free_ptr(tmpptr);
6293                 } else {
6294                     tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
6295                 }
6296                 tcg_temp_free_i64(tmp64);
6297             } else {
6298                 if (ri->writefn) {
6299                     TCGv_i32 tmp;
6300                     TCGv_ptr tmpptr;
6301                     tmp = load_reg(s, rt);
6302                     tmpptr = tcg_const_ptr(ri);
6303                     gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
6304                     tcg_temp_free_ptr(tmpptr);
6305                     tcg_temp_free_i32(tmp);
6306                 } else {
6307                     TCGv_i32 tmp = load_reg(s, rt);
6308                     store_cpu_offset(tmp, ri->fieldoffset);
6309                 }
6310             }
6311         }
6312
6313         /* I/O operations must end the TB here (whether read or write) */
6314         need_exit_tb = ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) &&
6315                         (ri->type & ARM_CP_IO));
6316
6317         if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
6318             /*
6319              * A write to any coprocessor register that ends a TB
6320              * must rebuild the hflags for the next TB.
6321              */
6322             TCGv_i32 tcg_el = tcg_const_i32(s->current_el);
6323             if (arm_dc_feature(s, ARM_FEATURE_M)) {
6324                 gen_helper_rebuild_hflags_m32(cpu_env, tcg_el);
6325             } else {
6326                 if (ri->type & ARM_CP_NEWEL) {
6327                     gen_helper_rebuild_hflags_a32_newel(cpu_env);
6328                 } else {
6329                     gen_helper_rebuild_hflags_a32(cpu_env, tcg_el);
6330                 }
6331             }
6332             tcg_temp_free_i32(tcg_el);
6333             /*
6334              * We default to ending the TB on a coprocessor register write,
6335              * but allow this to be suppressed by the register definition
6336              * (usually only necessary to work around guest bugs).
6337              */
6338             need_exit_tb = true;
6339         }
6340         if (need_exit_tb) {
6341             gen_lookup_tb(s);
6342         }
6343
6344         return 0;
6345     }
6346
6347     /* Unknown register; this might be a guest error or a QEMU
6348      * unimplemented feature.
6349      */
6350     if (is64) {
6351         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
6352                       "64 bit system register cp:%d opc1: %d crm:%d "
6353                       "(%s)\n",
6354                       isread ? "read" : "write", cpnum, opc1, crm,
6355                       s->ns ? "non-secure" : "secure");
6356     } else {
6357         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
6358                       "system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
6359                       "(%s)\n",
6360                       isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
6361                       s->ns ? "non-secure" : "secure");
6362     }
6363
6364     return 1;
6365 }
6366
6367
6368 /* Store a 64-bit value to a register pair.  Clobbers val.  */
6369 static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
6370 {
6371     TCGv_i32 tmp;
6372     tmp = tcg_temp_new_i32();
6373     tcg_gen_extrl_i64_i32(tmp, val);
6374     store_reg(s, rlow, tmp);
6375     tmp = tcg_temp_new_i32();
6376     tcg_gen_extrh_i64_i32(tmp, val);
6377     store_reg(s, rhigh, tmp);
6378 }
6379
6380 /* load and add a 64-bit value from a register pair.  */
6381 static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
6382 {
6383     TCGv_i64 tmp;
6384     TCGv_i32 tmpl;
6385     TCGv_i32 tmph;
6386
6387     /* Load 64-bit value rd:rn.  */
6388     tmpl = load_reg(s, rlow);
6389     tmph = load_reg(s, rhigh);
6390     tmp = tcg_temp_new_i64();
6391     tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
6392     tcg_temp_free_i32(tmpl);
6393     tcg_temp_free_i32(tmph);
6394     tcg_gen_add_i64(val, val, tmp);
6395     tcg_temp_free_i64(tmp);
6396 }
6397
6398 /* Set N and Z flags from hi|lo.  */
6399 static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
6400 {
6401     tcg_gen_mov_i32(cpu_NF, hi);
6402     tcg_gen_or_i32(cpu_ZF, lo, hi);
6403 }
6404
6405 /* Load/Store exclusive instructions are implemented by remembering
6406    the value/address loaded, and seeing if these are the same
6407    when the store is performed.  This should be sufficient to implement
6408    the architecturally mandated semantics, and avoids having to monitor
6409    regular stores.  The compare vs the remembered value is done during
6410    the cmpxchg operation, but we must compare the addresses manually.  */
6411 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
6412                                TCGv_i32 addr, int size)
6413 {
6414     TCGv_i32 tmp = tcg_temp_new_i32();
6415     MemOp opc = size | MO_ALIGN | s->be_data;
6416
6417     s->is_ldex = true;
6418
6419     if (size == 3) {
6420         TCGv_i32 tmp2 = tcg_temp_new_i32();
6421         TCGv_i64 t64 = tcg_temp_new_i64();
6422
6423         /* For AArch32, architecturally the 32-bit word at the lowest
6424          * address is always Rt and the one at addr+4 is Rt2, even if
6425          * the CPU is big-endian. That means we don't want to do a
6426          * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
6427          * for an architecturally 64-bit access, but instead do a
6428          * 64-bit access using MO_BE if appropriate and then split
6429          * the two halves.
6430          * This only makes a difference for BE32 user-mode, where
6431          * frob64() must not flip the two halves of the 64-bit data
6432          * but this code must treat BE32 user-mode like BE32 system.
6433          */
6434         TCGv taddr = gen_aa32_addr(s, addr, opc);
6435
6436         tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
6437         tcg_temp_free(taddr);
6438         tcg_gen_mov_i64(cpu_exclusive_val, t64);
6439         if (s->be_data == MO_BE) {
6440             tcg_gen_extr_i64_i32(tmp2, tmp, t64);
6441         } else {
6442             tcg_gen_extr_i64_i32(tmp, tmp2, t64);
6443         }
6444         tcg_temp_free_i64(t64);
6445
6446         store_reg(s, rt2, tmp2);
6447     } else {
6448         gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
6449         tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
6450     }
6451
6452     store_reg(s, rt, tmp);
6453     tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
6454 }
6455
6456 static void gen_clrex(DisasContext *s)
6457 {
6458     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
6459 }
6460
6461 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
6462                                 TCGv_i32 addr, int size)
6463 {
6464     TCGv_i32 t0, t1, t2;
6465     TCGv_i64 extaddr;
6466     TCGv taddr;
6467     TCGLabel *done_label;
6468     TCGLabel *fail_label;
6469     MemOp opc = size | MO_ALIGN | s->be_data;
6470
6471     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {
6472          [addr] = {Rt};
6473          {Rd} = 0;
6474        } else {
6475          {Rd} = 1;
6476        } */
6477     fail_label = gen_new_label();
6478     done_label = gen_new_label();
6479     extaddr = tcg_temp_new_i64();
6480     tcg_gen_extu_i32_i64(extaddr, addr);
6481     tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
6482     tcg_temp_free_i64(extaddr);
6483
6484     taddr = gen_aa32_addr(s, addr, opc);
6485     t0 = tcg_temp_new_i32();
6486     t1 = load_reg(s, rt);
6487     if (size == 3) {
6488         TCGv_i64 o64 = tcg_temp_new_i64();
6489         TCGv_i64 n64 = tcg_temp_new_i64();
6490
6491         t2 = load_reg(s, rt2);
6492         /* For AArch32, architecturally the 32-bit word at the lowest
6493          * address is always Rt and the one at addr+4 is Rt2, even if
6494          * the CPU is big-endian. Since we're going to treat this as a
6495          * single 64-bit BE store, we need to put the two halves in the
6496          * opposite order for BE to LE, so that they end up in the right
6497          * places.
6498          * We don't want gen_aa32_frob64() because that does the wrong
6499          * thing for BE32 usermode.
6500          */
6501         if (s->be_data == MO_BE) {
6502             tcg_gen_concat_i32_i64(n64, t2, t1);
6503         } else {
6504             tcg_gen_concat_i32_i64(n64, t1, t2);
6505         }
6506         tcg_temp_free_i32(t2);
6507
6508         tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
6509                                    get_mem_index(s), opc);
6510         tcg_temp_free_i64(n64);
6511
6512         tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
6513         tcg_gen_extrl_i64_i32(t0, o64);
6514
6515         tcg_temp_free_i64(o64);
6516     } else {
6517         t2 = tcg_temp_new_i32();
6518         tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
6519         tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
6520         tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
6521         tcg_temp_free_i32(t2);
6522     }
6523     tcg_temp_free_i32(t1);
6524     tcg_temp_free(taddr);
6525     tcg_gen_mov_i32(cpu_R[rd], t0);
6526     tcg_temp_free_i32(t0);
6527     tcg_gen_br(done_label);
6528
6529     gen_set_label(fail_label);
6530     tcg_gen_movi_i32(cpu_R[rd], 1);
6531     gen_set_label(done_label);
6532     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
6533 }
6534
6535 /* gen_srs:
6536  * @env: CPUARMState
6537  * @s: DisasContext
6538  * @mode: mode field from insn (which stack to store to)
6539  * @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
6540  * @writeback: true if writeback bit set
6541  *
6542  * Generate code for the SRS (Store Return State) insn.
6543  */
6544 static void gen_srs(DisasContext *s,
6545                     uint32_t mode, uint32_t amode, bool writeback)
6546 {
6547     int32_t offset;
6548     TCGv_i32 addr, tmp;
6549     bool undef = false;
6550
6551     /* SRS is:
6552      * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
6553      *   and specified mode is monitor mode
6554      * - UNDEFINED in Hyp mode
6555      * - UNPREDICTABLE in User or System mode
6556      * - UNPREDICTABLE if the specified mode is:
6557      * -- not implemented
6558      * -- not a valid mode number
6559      * -- a mode that's at a higher exception level
6560      * -- Monitor, if we are Non-secure
6561      * For the UNPREDICTABLE cases we choose to UNDEF.
6562      */
6563     if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
6564         gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), 3);
6565         return;
6566     }
6567
6568     if (s->current_el == 0 || s->current_el == 2) {
6569         undef = true;
6570     }
6571
6572     switch (mode) {
6573     case ARM_CPU_MODE_USR:
6574     case ARM_CPU_MODE_FIQ:
6575     case ARM_CPU_MODE_IRQ:
6576     case ARM_CPU_MODE_SVC:
6577     case ARM_CPU_MODE_ABT:
6578     case ARM_CPU_MODE_UND:
6579     case ARM_CPU_MODE_SYS:
6580         break;
6581     case ARM_CPU_MODE_HYP:
6582         if (s->current_el == 1 || !arm_dc_feature(s, ARM_FEATURE_EL2)) {
6583             undef = true;
6584         }
6585         break;
6586     case ARM_CPU_MODE_MON:
6587         /* No need to check specifically for "are we non-secure" because
6588          * we've already made EL0 UNDEF and handled the trap for S-EL1;
6589          * so if this isn't EL3 then we must be non-secure.
6590          */
6591         if (s->current_el != 3) {
6592             undef = true;
6593         }
6594         break;
6595     default:
6596         undef = true;
6597     }
6598
6599     if (undef) {
6600         unallocated_encoding(s);
6601         return;
6602     }
6603
6604     addr = tcg_temp_new_i32();
6605     tmp = tcg_const_i32(mode);
6606     /* get_r13_banked() will raise an exception if called from System mode */
6607     gen_set_condexec(s);
6608     gen_set_pc_im(s, s->pc_curr);
6609     gen_helper_get_r13_banked(addr, cpu_env, tmp);
6610     tcg_temp_free_i32(tmp);
6611     switch (amode) {
6612     case 0: /* DA */
6613         offset = -4;
6614         break;
6615     case 1: /* IA */
6616         offset = 0;
6617         break;
6618     case 2: /* DB */
6619         offset = -8;
6620         break;
6621     case 3: /* IB */
6622         offset = 4;
6623         break;
6624     default:
6625         abort();
6626     }
6627     tcg_gen_addi_i32(addr, addr, offset);
6628     tmp = load_reg(s, 14);
6629     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
6630     tcg_temp_free_i32(tmp);
6631     tmp = load_cpu_field(spsr);
6632     tcg_gen_addi_i32(addr, addr, 4);
6633     gen_aa32_st32(s, tmp, addr, get_mem_index(s));
6634     tcg_temp_free_i32(tmp);
6635     if (writeback) {
6636         switch (amode) {
6637         case 0:
6638             offset = -8;
6639             break;
6640         case 1:
6641             offset = 4;
6642             break;
6643         case 2:
6644             offset = -4;
6645             break;
6646         case 3:
6647             offset = 0;
6648             break;
6649         default:
6650             abort();
6651         }
6652         tcg_gen_addi_i32(addr, addr, offset);
6653         tmp = tcg_const_i32(mode);
6654         gen_helper_set_r13_banked(cpu_env, tmp, addr);
6655         tcg_temp_free_i32(tmp);
6656     }
6657     tcg_temp_free_i32(addr);
6658     s->base.is_jmp = DISAS_UPDATE;
6659 }
6660
6661 /* Generate a label used for skipping this instruction */
6662 static void arm_gen_condlabel(DisasContext *s)
6663 {
6664     if (!s->condjmp) {
6665         s->condlabel = gen_new_label();
6666         s->condjmp = 1;
6667     }
6668 }
6669
6670 /* Skip this instruction if the ARM condition is false */
6671 static void arm_skip_unless(DisasContext *s, uint32_t cond)
6672 {
6673     arm_gen_condlabel(s);
6674     arm_gen_test_cc(cond ^ 1, s->condlabel);
6675 }
6676
6677
6678 /*
6679  * Constant expanders for the decoders.
6680  */
6681
6682 static int negate(DisasContext *s, int x)
6683 {
6684     return -x;
6685 }
6686
6687 static int plus_2(DisasContext *s, int x)
6688 {
6689     return x + 2;
6690 }
6691
6692 static int times_2(DisasContext *s, int x)
6693 {
6694     return x * 2;
6695 }
6696
6697 static int times_4(DisasContext *s, int x)
6698 {
6699     return x * 4;
6700 }
6701
6702 /* Return only the rotation part of T32ExpandImm.  */
6703 static int t32_expandimm_rot(DisasContext *s, int x)
6704 {
6705     return x & 0xc00 ? extract32(x, 7, 5) : 0;
6706 }
6707
6708 /* Return the unrotated immediate from T32ExpandImm.  */
6709 static int t32_expandimm_imm(DisasContext *s, int x)
6710 {
6711     int imm = extract32(x, 0, 8);
6712
6713     switch (extract32(x, 8, 4)) {
6714     case 0: /* XY */
6715         /* Nothing to do.  */
6716         break;
6717     case 1: /* 00XY00XY */
6718         imm *= 0x00010001;
6719         break;
6720     case 2: /* XY00XY00 */
6721         imm *= 0x01000100;
6722         break;
6723     case 3: /* XYXYXYXY */
6724         imm *= 0x01010101;
6725         break;
6726     default:
6727         /* Rotated constant.  */
6728         imm |= 0x80;
6729         break;
6730     }
6731     return imm;
6732 }
6733
6734 static int t32_branch24(DisasContext *s, int x)
6735 {
6736     /* Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S.  */
6737     x ^= !(x < 0) * (3 << 21);
6738     /* Append the final zero.  */
6739     return x << 1;
6740 }
6741
6742 static int t16_setflags(DisasContext *s)
6743 {
6744     return s->condexec_mask == 0;
6745 }
6746
6747 static int t16_push_list(DisasContext *s, int x)
6748 {
6749     return (x & 0xff) | (x & 0x100) << (14 - 8);
6750 }
6751
6752 static int t16_pop_list(DisasContext *s, int x)
6753 {
6754     return (x & 0xff) | (x & 0x100) << (15 - 8);
6755 }
6756
6757 /*
6758  * Include the generated decoders.
6759  */
6760
6761 #include "decode-a32.inc.c"
6762 #include "decode-a32-uncond.inc.c"
6763 #include "decode-t32.inc.c"
6764 #include "decode-t16.inc.c"
6765
6766 /* Helpers to swap operands for reverse-subtract.  */
6767 static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
6768 {
6769     tcg_gen_sub_i32(dst, b, a);
6770 }
6771
6772 static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
6773 {
6774     gen_sub_CC(dst, b, a);
6775 }
6776
6777 static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
6778 {
6779     gen_sub_carry(dest, b, a);
6780 }
6781
6782 static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
6783 {
6784     gen_sbc_CC(dest, b, a);
6785 }
6786
6787 /*
6788  * Helpers for the data processing routines.
6789  *
6790  * After the computation store the results back.
6791  * This may be suppressed altogether (STREG_NONE), require a runtime
6792  * check against the stack limits (STREG_SP_CHECK), or generate an
6793  * exception return.  Oh, or store into a register.
6794  *
6795  * Always return true, indicating success for a trans_* function.
6796  */
6797 typedef enum {
6798    STREG_NONE,
6799    STREG_NORMAL,
6800    STREG_SP_CHECK,
6801    STREG_EXC_RET,
6802 } StoreRegKind;
6803
6804 static bool store_reg_kind(DisasContext *s, int rd,
6805                             TCGv_i32 val, StoreRegKind kind)
6806 {
6807     switch (kind) {
6808     case STREG_NONE:
6809         tcg_temp_free_i32(val);
6810         return true;
6811     case STREG_NORMAL:
6812         /* See ALUWritePC: Interworking only from a32 mode. */
6813         if (s->thumb) {
6814             store_reg(s, rd, val);
6815         } else {
6816             store_reg_bx(s, rd, val);
6817         }
6818         return true;
6819     case STREG_SP_CHECK:
6820         store_sp_checked(s, val);
6821         return true;
6822     case STREG_EXC_RET:
6823         gen_exception_return(s, val);
6824         return true;
6825     }
6826     g_assert_not_reached();
6827 }
6828
6829 /*
6830  * Data Processing (register)
6831  *
6832  * Operate, with set flags, one register source,
6833  * one immediate shifted register source, and a destination.
6834  */
6835 static bool op_s_rrr_shi(DisasContext *s, arg_s_rrr_shi *a,
6836                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
6837                          int logic_cc, StoreRegKind kind)
6838 {
6839     TCGv_i32 tmp1, tmp2;
6840
6841     tmp2 = load_reg(s, a->rm);
6842     gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
6843     tmp1 = load_reg(s, a->rn);
6844
6845     gen(tmp1, tmp1, tmp2);
6846     tcg_temp_free_i32(tmp2);
6847
6848     if (logic_cc) {
6849         gen_logic_CC(tmp1);
6850     }
6851     return store_reg_kind(s, a->rd, tmp1, kind);
6852 }
6853
6854 static bool op_s_rxr_shi(DisasContext *s, arg_s_rrr_shi *a,
6855                          void (*gen)(TCGv_i32, TCGv_i32),
6856                          int logic_cc, StoreRegKind kind)
6857 {
6858     TCGv_i32 tmp;
6859
6860     tmp = load_reg(s, a->rm);
6861     gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
6862
6863     gen(tmp, tmp);
6864     if (logic_cc) {
6865         gen_logic_CC(tmp);
6866     }
6867     return store_reg_kind(s, a->rd, tmp, kind);
6868 }
6869
6870 /*
6871  * Data-processing (register-shifted register)
6872  *
6873  * Operate, with set flags, one register source,
6874  * one register shifted register source, and a destination.
6875  */
6876 static bool op_s_rrr_shr(DisasContext *s, arg_s_rrr_shr *a,
6877                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
6878                          int logic_cc, StoreRegKind kind)
6879 {
6880     TCGv_i32 tmp1, tmp2;
6881
6882     tmp1 = load_reg(s, a->rs);
6883     tmp2 = load_reg(s, a->rm);
6884     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
6885     tmp1 = load_reg(s, a->rn);
6886
6887     gen(tmp1, tmp1, tmp2);
6888     tcg_temp_free_i32(tmp2);
6889
6890     if (logic_cc) {
6891         gen_logic_CC(tmp1);
6892     }
6893     return store_reg_kind(s, a->rd, tmp1, kind);
6894 }
6895
6896 static bool op_s_rxr_shr(DisasContext *s, arg_s_rrr_shr *a,
6897                          void (*gen)(TCGv_i32, TCGv_i32),
6898                          int logic_cc, StoreRegKind kind)
6899 {
6900     TCGv_i32 tmp1, tmp2;
6901
6902     tmp1 = load_reg(s, a->rs);
6903     tmp2 = load_reg(s, a->rm);
6904     gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
6905
6906     gen(tmp2, tmp2);
6907     if (logic_cc) {
6908         gen_logic_CC(tmp2);
6909     }
6910     return store_reg_kind(s, a->rd, tmp2, kind);
6911 }
6912
6913 /*
6914  * Data-processing (immediate)
6915  *
6916  * Operate, with set flags, one register source,
6917  * one rotated immediate, and a destination.
6918  *
6919  * Note that logic_cc && a->rot setting CF based on the msb of the
6920  * immediate is the reason why we must pass in the unrotated form
6921  * of the immediate.
6922  */
6923 static bool op_s_rri_rot(DisasContext *s, arg_s_rri_rot *a,
6924                          void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
6925                          int logic_cc, StoreRegKind kind)
6926 {
6927     TCGv_i32 tmp1, tmp2;
6928     uint32_t imm;
6929
6930     imm = ror32(a->imm, a->rot);
6931     if (logic_cc && a->rot) {
6932         tcg_gen_movi_i32(cpu_CF, imm >> 31);
6933     }
6934     tmp2 = tcg_const_i32(imm);
6935     tmp1 = load_reg(s, a->rn);
6936
6937     gen(tmp1, tmp1, tmp2);
6938     tcg_temp_free_i32(tmp2);
6939
6940     if (logic_cc) {
6941         gen_logic_CC(tmp1);
6942     }
6943     return store_reg_kind(s, a->rd, tmp1, kind);
6944 }
6945
6946 static bool op_s_rxi_rot(DisasContext *s, arg_s_rri_rot *a,
6947                          void (*gen)(TCGv_i32, TCGv_i32),
6948                          int logic_cc, StoreRegKind kind)
6949 {
6950     TCGv_i32 tmp;
6951     uint32_t imm;
6952
6953     imm = ror32(a->imm, a->rot);
6954     if (logic_cc && a->rot) {
6955         tcg_gen_movi_i32(cpu_CF, imm >> 31);
6956     }
6957     tmp = tcg_const_i32(imm);
6958
6959     gen(tmp, tmp);
6960     if (logic_cc) {
6961         gen_logic_CC(tmp);
6962     }
6963     return store_reg_kind(s, a->rd, tmp, kind);
6964 }
6965
6966 #define DO_ANY3(NAME, OP, L, K)                                         \
6967     static bool trans_##NAME##_rrri(DisasContext *s, arg_s_rrr_shi *a)  \
6968     { StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); }      \
6969     static bool trans_##NAME##_rrrr(DisasContext *s, arg_s_rrr_shr *a)  \
6970     { StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); }      \
6971     static bool trans_##NAME##_rri(DisasContext *s, arg_s_rri_rot *a)   \
6972     { StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
6973
6974 #define DO_ANY2(NAME, OP, L, K)                                         \
6975     static bool trans_##NAME##_rxri(DisasContext *s, arg_s_rrr_shi *a)  \
6976     { StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); }      \
6977     static bool trans_##NAME##_rxrr(DisasContext *s, arg_s_rrr_shr *a)  \
6978     { StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); }      \
6979     static bool trans_##NAME##_rxi(DisasContext *s, arg_s_rri_rot *a)   \
6980     { StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
6981
6982 #define DO_CMP2(NAME, OP, L)                                            \
6983     static bool trans_##NAME##_xrri(DisasContext *s, arg_s_rrr_shi *a)  \
6984     { return op_s_rrr_shi(s, a, OP, L, STREG_NONE); }                   \
6985     static bool trans_##NAME##_xrrr(DisasContext *s, arg_s_rrr_shr *a)  \
6986     { return op_s_rrr_shr(s, a, OP, L, STREG_NONE); }                   \
6987     static bool trans_##NAME##_xri(DisasContext *s, arg_s_rri_rot *a)   \
6988     { return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
6989
6990 DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
6991 DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
6992 DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
6993 DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
6994
6995 DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
6996 DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
6997 DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
6998 DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
6999
7000 DO_CMP2(TST, tcg_gen_and_i32, true)
7001 DO_CMP2(TEQ, tcg_gen_xor_i32, true)
7002 DO_CMP2(CMN, gen_add_CC, false)
7003 DO_CMP2(CMP, gen_sub_CC, false)
7004
7005 DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
7006         a->rd == 13 && a->rn == 13 ? STREG_SP_CHECK : STREG_NORMAL)
7007
7008 /*
7009  * Note for the computation of StoreRegKind we return out of the
7010  * middle of the functions that are expanded by DO_ANY3, and that
7011  * we modify a->s via that parameter before it is used by OP.
7012  */
7013 DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
7014         ({
7015             StoreRegKind ret = STREG_NORMAL;
7016             if (a->rd == 15 && a->s) {
7017                 /*
7018                  * See ALUExceptionReturn:
7019                  * In User mode, UNPREDICTABLE; we choose UNDEF.
7020                  * In Hyp mode, UNDEFINED.
7021                  */
7022                 if (IS_USER(s) || s->current_el == 2) {
7023                     unallocated_encoding(s);
7024                     return true;
7025                 }
7026                 /* There is no writeback of nzcv to PSTATE.  */
7027                 a->s = 0;
7028                 ret = STREG_EXC_RET;
7029             } else if (a->rd == 13 && a->rn == 13) {
7030                 ret = STREG_SP_CHECK;
7031             }
7032             ret;
7033         }))
7034
7035 DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
7036         ({
7037             StoreRegKind ret = STREG_NORMAL;
7038             if (a->rd == 15 && a->s) {
7039                 /*
7040                  * See ALUExceptionReturn:
7041                  * In User mode, UNPREDICTABLE; we choose UNDEF.
7042                  * In Hyp mode, UNDEFINED.
7043                  */
7044                 if (IS_USER(s) || s->current_el == 2) {
7045                     unallocated_encoding(s);
7046                     return true;
7047                 }
7048                 /* There is no writeback of nzcv to PSTATE.  */
7049                 a->s = 0;
7050                 ret = STREG_EXC_RET;
7051             } else if (a->rd == 13) {
7052                 ret = STREG_SP_CHECK;
7053             }
7054             ret;
7055         }))
7056
7057 DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
7058
7059 /*
7060  * ORN is only available with T32, so there is no register-shifted-register
7061  * form of the insn.  Using the DO_ANY3 macro would create an unused function.
7062  */
7063 static bool trans_ORN_rrri(DisasContext *s, arg_s_rrr_shi *a)
7064 {
7065     return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7066 }
7067
7068 static bool trans_ORN_rri(DisasContext *s, arg_s_rri_rot *a)
7069 {
7070     return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7071 }
7072
7073 #undef DO_ANY3
7074 #undef DO_ANY2
7075 #undef DO_CMP2
7076
7077 static bool trans_ADR(DisasContext *s, arg_ri *a)
7078 {
7079     store_reg_bx(s, a->rd, add_reg_for_lit(s, 15, a->imm));
7080     return true;
7081 }
7082
7083 static bool trans_MOVW(DisasContext *s, arg_MOVW *a)
7084 {
7085     TCGv_i32 tmp;
7086
7087     if (!ENABLE_ARCH_6T2) {
7088         return false;
7089     }
7090
7091     tmp = tcg_const_i32(a->imm);
7092     store_reg(s, a->rd, tmp);
7093     return true;
7094 }
7095
7096 static bool trans_MOVT(DisasContext *s, arg_MOVW *a)
7097 {
7098     TCGv_i32 tmp;
7099
7100     if (!ENABLE_ARCH_6T2) {
7101         return false;
7102     }
7103
7104     tmp = load_reg(s, a->rd);
7105     tcg_gen_ext16u_i32(tmp, tmp);
7106     tcg_gen_ori_i32(tmp, tmp, a->imm << 16);
7107     store_reg(s, a->rd, tmp);
7108     return true;
7109 }
7110
7111 /*
7112  * Multiply and multiply accumulate
7113  */
7114
7115 static bool op_mla(DisasContext *s, arg_s_rrrr *a, bool add)
7116 {
7117     TCGv_i32 t1, t2;
7118
7119     t1 = load_reg(s, a->rn);
7120     t2 = load_reg(s, a->rm);
7121     tcg_gen_mul_i32(t1, t1, t2);
7122     tcg_temp_free_i32(t2);
7123     if (add) {
7124         t2 = load_reg(s, a->ra);
7125         tcg_gen_add_i32(t1, t1, t2);
7126         tcg_temp_free_i32(t2);
7127     }
7128     if (a->s) {
7129         gen_logic_CC(t1);
7130     }
7131     store_reg(s, a->rd, t1);
7132     return true;
7133 }
7134
7135 static bool trans_MUL(DisasContext *s, arg_MUL *a)
7136 {
7137     return op_mla(s, a, false);
7138 }
7139
7140 static bool trans_MLA(DisasContext *s, arg_MLA *a)
7141 {
7142     return op_mla(s, a, true);
7143 }
7144
7145 static bool trans_MLS(DisasContext *s, arg_MLS *a)
7146 {
7147     TCGv_i32 t1, t2;
7148
7149     if (!ENABLE_ARCH_6T2) {
7150         return false;
7151     }
7152     t1 = load_reg(s, a->rn);
7153     t2 = load_reg(s, a->rm);
7154     tcg_gen_mul_i32(t1, t1, t2);
7155     tcg_temp_free_i32(t2);
7156     t2 = load_reg(s, a->ra);
7157     tcg_gen_sub_i32(t1, t2, t1);
7158     tcg_temp_free_i32(t2);
7159     store_reg(s, a->rd, t1);
7160     return true;
7161 }
7162
7163 static bool op_mlal(DisasContext *s, arg_s_rrrr *a, bool uns, bool add)
7164 {
7165     TCGv_i32 t0, t1, t2, t3;
7166
7167     t0 = load_reg(s, a->rm);
7168     t1 = load_reg(s, a->rn);
7169     if (uns) {
7170         tcg_gen_mulu2_i32(t0, t1, t0, t1);
7171     } else {
7172         tcg_gen_muls2_i32(t0, t1, t0, t1);
7173     }
7174     if (add) {
7175         t2 = load_reg(s, a->ra);
7176         t3 = load_reg(s, a->rd);
7177         tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
7178         tcg_temp_free_i32(t2);
7179         tcg_temp_free_i32(t3);
7180     }
7181     if (a->s) {
7182         gen_logicq_cc(t0, t1);
7183     }
7184     store_reg(s, a->ra, t0);
7185     store_reg(s, a->rd, t1);
7186     return true;
7187 }
7188
7189 static bool trans_UMULL(DisasContext *s, arg_UMULL *a)
7190 {
7191     return op_mlal(s, a, true, false);
7192 }
7193
7194 static bool trans_SMULL(DisasContext *s, arg_SMULL *a)
7195 {
7196     return op_mlal(s, a, false, false);
7197 }
7198
7199 static bool trans_UMLAL(DisasContext *s, arg_UMLAL *a)
7200 {
7201     return op_mlal(s, a, true, true);
7202 }
7203
7204 static bool trans_SMLAL(DisasContext *s, arg_SMLAL *a)
7205 {
7206     return op_mlal(s, a, false, true);
7207 }
7208
7209 static bool trans_UMAAL(DisasContext *s, arg_UMAAL *a)
7210 {
7211     TCGv_i32 t0, t1, t2, zero;
7212
7213     if (s->thumb
7214         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7215         : !ENABLE_ARCH_6) {
7216         return false;
7217     }
7218
7219     t0 = load_reg(s, a->rm);
7220     t1 = load_reg(s, a->rn);
7221     tcg_gen_mulu2_i32(t0, t1, t0, t1);
7222     zero = tcg_const_i32(0);
7223     t2 = load_reg(s, a->ra);
7224     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7225     tcg_temp_free_i32(t2);
7226     t2 = load_reg(s, a->rd);
7227     tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7228     tcg_temp_free_i32(t2);
7229     tcg_temp_free_i32(zero);
7230     store_reg(s, a->ra, t0);
7231     store_reg(s, a->rd, t1);
7232     return true;
7233 }
7234
7235 /*
7236  * Saturating addition and subtraction
7237  */
7238
7239 static bool op_qaddsub(DisasContext *s, arg_rrr *a, bool add, bool doub)
7240 {
7241     TCGv_i32 t0, t1;
7242
7243     if (s->thumb
7244         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7245         : !ENABLE_ARCH_5TE) {
7246         return false;
7247     }
7248
7249     t0 = load_reg(s, a->rm);
7250     t1 = load_reg(s, a->rn);
7251     if (doub) {
7252         gen_helper_add_saturate(t1, cpu_env, t1, t1);
7253     }
7254     if (add) {
7255         gen_helper_add_saturate(t0, cpu_env, t0, t1);
7256     } else {
7257         gen_helper_sub_saturate(t0, cpu_env, t0, t1);
7258     }
7259     tcg_temp_free_i32(t1);
7260     store_reg(s, a->rd, t0);
7261     return true;
7262 }
7263
7264 #define DO_QADDSUB(NAME, ADD, DOUB) \
7265 static bool trans_##NAME(DisasContext *s, arg_rrr *a)    \
7266 {                                                        \
7267     return op_qaddsub(s, a, ADD, DOUB);                  \
7268 }
7269
7270 DO_QADDSUB(QADD, true, false)
7271 DO_QADDSUB(QSUB, false, false)
7272 DO_QADDSUB(QDADD, true, true)
7273 DO_QADDSUB(QDSUB, false, true)
7274
7275 #undef DO_QADDSUB
7276
7277 /*
7278  * Halfword multiply and multiply accumulate
7279  */
7280
7281 static bool op_smlaxxx(DisasContext *s, arg_rrrr *a,
7282                        int add_long, bool nt, bool mt)
7283 {
7284     TCGv_i32 t0, t1, tl, th;
7285
7286     if (s->thumb
7287         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7288         : !ENABLE_ARCH_5TE) {
7289         return false;
7290     }
7291
7292     t0 = load_reg(s, a->rn);
7293     t1 = load_reg(s, a->rm);
7294     gen_mulxy(t0, t1, nt, mt);
7295     tcg_temp_free_i32(t1);
7296
7297     switch (add_long) {
7298     case 0:
7299         store_reg(s, a->rd, t0);
7300         break;
7301     case 1:
7302         t1 = load_reg(s, a->ra);
7303         gen_helper_add_setq(t0, cpu_env, t0, t1);
7304         tcg_temp_free_i32(t1);
7305         store_reg(s, a->rd, t0);
7306         break;
7307     case 2:
7308         tl = load_reg(s, a->ra);
7309         th = load_reg(s, a->rd);
7310         /* Sign-extend the 32-bit product to 64 bits.  */
7311         t1 = tcg_temp_new_i32();
7312         tcg_gen_sari_i32(t1, t0, 31);
7313         tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
7314         tcg_temp_free_i32(t0);
7315         tcg_temp_free_i32(t1);
7316         store_reg(s, a->ra, tl);
7317         store_reg(s, a->rd, th);
7318         break;
7319     default:
7320         g_assert_not_reached();
7321     }
7322     return true;
7323 }
7324
7325 #define DO_SMLAX(NAME, add, nt, mt) \
7326 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
7327 {                                                          \
7328     return op_smlaxxx(s, a, add, nt, mt);                  \
7329 }
7330
7331 DO_SMLAX(SMULBB, 0, 0, 0)
7332 DO_SMLAX(SMULBT, 0, 0, 1)
7333 DO_SMLAX(SMULTB, 0, 1, 0)
7334 DO_SMLAX(SMULTT, 0, 1, 1)
7335
7336 DO_SMLAX(SMLABB, 1, 0, 0)
7337 DO_SMLAX(SMLABT, 1, 0, 1)
7338 DO_SMLAX(SMLATB, 1, 1, 0)
7339 DO_SMLAX(SMLATT, 1, 1, 1)
7340
7341 DO_SMLAX(SMLALBB, 2, 0, 0)
7342 DO_SMLAX(SMLALBT, 2, 0, 1)
7343 DO_SMLAX(SMLALTB, 2, 1, 0)
7344 DO_SMLAX(SMLALTT, 2, 1, 1)
7345
7346 #undef DO_SMLAX
7347
7348 static bool op_smlawx(DisasContext *s, arg_rrrr *a, bool add, bool mt)
7349 {
7350     TCGv_i32 t0, t1;
7351
7352     if (!ENABLE_ARCH_5TE) {
7353         return false;
7354     }
7355
7356     t0 = load_reg(s, a->rn);
7357     t1 = load_reg(s, a->rm);
7358     /*
7359      * Since the nominal result is product<47:16>, shift the 16-bit
7360      * input up by 16 bits, so that the result is at product<63:32>.
7361      */
7362     if (mt) {
7363         tcg_gen_andi_i32(t1, t1, 0xffff0000);
7364     } else {
7365         tcg_gen_shli_i32(t1, t1, 16);
7366     }
7367     tcg_gen_muls2_i32(t0, t1, t0, t1);
7368     tcg_temp_free_i32(t0);
7369     if (add) {
7370         t0 = load_reg(s, a->ra);
7371         gen_helper_add_setq(t1, cpu_env, t1, t0);
7372         tcg_temp_free_i32(t0);
7373     }
7374     store_reg(s, a->rd, t1);
7375     return true;
7376 }
7377
7378 #define DO_SMLAWX(NAME, add, mt) \
7379 static bool trans_##NAME(DisasContext *s, arg_rrrr *a)     \
7380 {                                                          \
7381     return op_smlawx(s, a, add, mt);                       \
7382 }
7383
7384 DO_SMLAWX(SMULWB, 0, 0)
7385 DO_SMLAWX(SMULWT, 0, 1)
7386 DO_SMLAWX(SMLAWB, 1, 0)
7387 DO_SMLAWX(SMLAWT, 1, 1)
7388
7389 #undef DO_SMLAWX
7390
7391 /*
7392  * MSR (immediate) and hints
7393  */
7394
7395 static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
7396 {
7397     /*
7398      * When running single-threaded TCG code, use the helper to ensure that
7399      * the next round-robin scheduled vCPU gets a crack.  When running in
7400      * MTTCG we don't generate jumps to the helper as it won't affect the
7401      * scheduling of other vCPUs.
7402      */
7403     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
7404         gen_set_pc_im(s, s->base.pc_next);
7405         s->base.is_jmp = DISAS_YIELD;
7406     }
7407     return true;
7408 }
7409
7410 static bool trans_WFE(DisasContext *s, arg_WFE *a)
7411 {
7412     /*
7413      * When running single-threaded TCG code, use the helper to ensure that
7414      * the next round-robin scheduled vCPU gets a crack.  In MTTCG mode we
7415      * just skip this instruction.  Currently the SEV/SEVL instructions,
7416      * which are *one* of many ways to wake the CPU from WFE, are not
7417      * implemented so we can't sleep like WFI does.
7418      */
7419     if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
7420         gen_set_pc_im(s, s->base.pc_next);
7421         s->base.is_jmp = DISAS_WFE;
7422     }
7423     return true;
7424 }
7425
7426 static bool trans_WFI(DisasContext *s, arg_WFI *a)
7427 {
7428     /* For WFI, halt the vCPU until an IRQ. */
7429     gen_set_pc_im(s, s->base.pc_next);
7430     s->base.is_jmp = DISAS_WFI;
7431     return true;
7432 }
7433
7434 static bool trans_NOP(DisasContext *s, arg_NOP *a)
7435 {
7436     return true;
7437 }
7438
7439 static bool trans_MSR_imm(DisasContext *s, arg_MSR_imm *a)
7440 {
7441     uint32_t val = ror32(a->imm, a->rot * 2);
7442     uint32_t mask = msr_mask(s, a->mask, a->r);
7443
7444     if (gen_set_psr_im(s, mask, a->r, val)) {
7445         unallocated_encoding(s);
7446     }
7447     return true;
7448 }
7449
7450 /*
7451  * Cyclic Redundancy Check
7452  */
7453
7454 static bool op_crc32(DisasContext *s, arg_rrr *a, bool c, MemOp sz)
7455 {
7456     TCGv_i32 t1, t2, t3;
7457
7458     if (!dc_isar_feature(aa32_crc32, s)) {
7459         return false;
7460     }
7461
7462     t1 = load_reg(s, a->rn);
7463     t2 = load_reg(s, a->rm);
7464     switch (sz) {
7465     case MO_8:
7466         gen_uxtb(t2);
7467         break;
7468     case MO_16:
7469         gen_uxth(t2);
7470         break;
7471     case MO_32:
7472         break;
7473     default:
7474         g_assert_not_reached();
7475     }
7476     t3 = tcg_const_i32(1 << sz);
7477     if (c) {
7478         gen_helper_crc32c(t1, t1, t2, t3);
7479     } else {
7480         gen_helper_crc32(t1, t1, t2, t3);
7481     }
7482     tcg_temp_free_i32(t2);
7483     tcg_temp_free_i32(t3);
7484     store_reg(s, a->rd, t1);
7485     return true;
7486 }
7487
7488 #define DO_CRC32(NAME, c, sz) \
7489 static bool trans_##NAME(DisasContext *s, arg_rrr *a)  \
7490     { return op_crc32(s, a, c, sz); }
7491
7492 DO_CRC32(CRC32B, false, MO_8)
7493 DO_CRC32(CRC32H, false, MO_16)
7494 DO_CRC32(CRC32W, false, MO_32)
7495 DO_CRC32(CRC32CB, true, MO_8)
7496 DO_CRC32(CRC32CH, true, MO_16)
7497 DO_CRC32(CRC32CW, true, MO_32)
7498
7499 #undef DO_CRC32
7500
7501 /*
7502  * Miscellaneous instructions
7503  */
7504
7505 static bool trans_MRS_bank(DisasContext *s, arg_MRS_bank *a)
7506 {
7507     if (arm_dc_feature(s, ARM_FEATURE_M)) {
7508         return false;
7509     }
7510     gen_mrs_banked(s, a->r, a->sysm, a->rd);
7511     return true;
7512 }
7513
7514 static bool trans_MSR_bank(DisasContext *s, arg_MSR_bank *a)
7515 {
7516     if (arm_dc_feature(s, ARM_FEATURE_M)) {
7517         return false;
7518     }
7519     gen_msr_banked(s, a->r, a->sysm, a->rn);
7520     return true;
7521 }
7522
7523 static bool trans_MRS_reg(DisasContext *s, arg_MRS_reg *a)
7524 {
7525     TCGv_i32 tmp;
7526
7527     if (arm_dc_feature(s, ARM_FEATURE_M)) {
7528         return false;
7529     }
7530     if (a->r) {
7531         if (IS_USER(s)) {
7532             unallocated_encoding(s);
7533             return true;
7534         }
7535         tmp = load_cpu_field(spsr);
7536     } else {
7537         tmp = tcg_temp_new_i32();
7538         gen_helper_cpsr_read(tmp, cpu_env);
7539     }
7540     store_reg(s, a->rd, tmp);
7541     return true;
7542 }
7543
7544 static bool trans_MSR_reg(DisasContext *s, arg_MSR_reg *a)
7545 {
7546     TCGv_i32 tmp;
7547     uint32_t mask = msr_mask(s, a->mask, a->r);
7548
7549     if (arm_dc_feature(s, ARM_FEATURE_M)) {
7550         return false;
7551     }
7552     tmp = load_reg(s, a->rn);
7553     if (gen_set_psr(s, mask, a->r, tmp)) {
7554         unallocated_encoding(s);
7555     }
7556     return true;
7557 }
7558
7559 static bool trans_MRS_v7m(DisasContext *s, arg_MRS_v7m *a)
7560 {
7561     TCGv_i32 tmp;
7562
7563     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
7564         return false;
7565     }
7566     tmp = tcg_const_i32(a->sysm);
7567     gen_helper_v7m_mrs(tmp, cpu_env, tmp);
7568     store_reg(s, a->rd, tmp);
7569     return true;
7570 }
7571
7572 static bool trans_MSR_v7m(DisasContext *s, arg_MSR_v7m *a)
7573 {
7574     TCGv_i32 addr, reg;
7575
7576     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
7577         return false;
7578     }
7579     addr = tcg_const_i32((a->mask << 10) | a->sysm);
7580     reg = load_reg(s, a->rn);
7581     gen_helper_v7m_msr(cpu_env, addr, reg);
7582     tcg_temp_free_i32(addr);
7583     tcg_temp_free_i32(reg);
7584     /* If we wrote to CONTROL, the EL might have changed */
7585     gen_helper_rebuild_hflags_m32_newel(cpu_env);
7586     gen_lookup_tb(s);
7587     return true;
7588 }
7589
7590 static bool trans_BX(DisasContext *s, arg_BX *a)
7591 {
7592     if (!ENABLE_ARCH_4T) {
7593         return false;
7594     }
7595     gen_bx_excret(s, load_reg(s, a->rm));
7596     return true;
7597 }
7598
7599 static bool trans_BXJ(DisasContext *s, arg_BXJ *a)
7600 {
7601     if (!ENABLE_ARCH_5J || arm_dc_feature(s, ARM_FEATURE_M)) {
7602         return false;
7603     }
7604     /* Trivial implementation equivalent to bx.  */
7605     gen_bx(s, load_reg(s, a->rm));
7606     return true;
7607 }
7608
7609 static bool trans_BLX_r(DisasContext *s, arg_BLX_r *a)
7610 {
7611     TCGv_i32 tmp;
7612
7613     if (!ENABLE_ARCH_5) {
7614         return false;
7615     }
7616     tmp = load_reg(s, a->rm);
7617     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
7618     gen_bx(s, tmp);
7619     return true;
7620 }
7621
7622 /*
7623  * BXNS/BLXNS: only exist for v8M with the security extensions,
7624  * and always UNDEF if NonSecure.  We don't implement these in
7625  * the user-only mode either (in theory you can use them from
7626  * Secure User mode but they are too tied in to system emulation).
7627  */
7628 static bool trans_BXNS(DisasContext *s, arg_BXNS *a)
7629 {
7630     if (!s->v8m_secure || IS_USER_ONLY) {
7631         unallocated_encoding(s);
7632     } else {
7633         gen_bxns(s, a->rm);
7634     }
7635     return true;
7636 }
7637
7638 static bool trans_BLXNS(DisasContext *s, arg_BLXNS *a)
7639 {
7640     if (!s->v8m_secure || IS_USER_ONLY) {
7641         unallocated_encoding(s);
7642     } else {
7643         gen_blxns(s, a->rm);
7644     }
7645     return true;
7646 }
7647
7648 static bool trans_CLZ(DisasContext *s, arg_CLZ *a)
7649 {
7650     TCGv_i32 tmp;
7651
7652     if (!ENABLE_ARCH_5) {
7653         return false;
7654     }
7655     tmp = load_reg(s, a->rm);
7656     tcg_gen_clzi_i32(tmp, tmp, 32);
7657     store_reg(s, a->rd, tmp);
7658     return true;
7659 }
7660
7661 static bool trans_ERET(DisasContext *s, arg_ERET *a)
7662 {
7663     TCGv_i32 tmp;
7664
7665     if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
7666         return false;
7667     }
7668     if (IS_USER(s)) {
7669         unallocated_encoding(s);
7670         return true;
7671     }
7672     if (s->current_el == 2) {
7673         /* ERET from Hyp uses ELR_Hyp, not LR */
7674         tmp = load_cpu_field(elr_el[2]);
7675     } else {
7676         tmp = load_reg(s, 14);
7677     }
7678     gen_exception_return(s, tmp);
7679     return true;
7680 }
7681
7682 static bool trans_HLT(DisasContext *s, arg_HLT *a)
7683 {
7684     gen_hlt(s, a->imm);
7685     return true;
7686 }
7687
7688 static bool trans_BKPT(DisasContext *s, arg_BKPT *a)
7689 {
7690     if (!ENABLE_ARCH_5) {
7691         return false;
7692     }
7693     if (arm_dc_feature(s, ARM_FEATURE_M) &&
7694         semihosting_enabled() &&
7695 #ifndef CONFIG_USER_ONLY
7696         !IS_USER(s) &&
7697 #endif
7698         (a->imm == 0xab)) {
7699         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
7700     } else {
7701         gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
7702     }
7703     return true;
7704 }
7705
7706 static bool trans_HVC(DisasContext *s, arg_HVC *a)
7707 {
7708     if (!ENABLE_ARCH_7 || arm_dc_feature(s, ARM_FEATURE_M)) {
7709         return false;
7710     }
7711     if (IS_USER(s)) {
7712         unallocated_encoding(s);
7713     } else {
7714         gen_hvc(s, a->imm);
7715     }
7716     return true;
7717 }
7718
7719 static bool trans_SMC(DisasContext *s, arg_SMC *a)
7720 {
7721     if (!ENABLE_ARCH_6K || arm_dc_feature(s, ARM_FEATURE_M)) {
7722         return false;
7723     }
7724     if (IS_USER(s)) {
7725         unallocated_encoding(s);
7726     } else {
7727         gen_smc(s);
7728     }
7729     return true;
7730 }
7731
7732 static bool trans_SG(DisasContext *s, arg_SG *a)
7733 {
7734     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
7735         !arm_dc_feature(s, ARM_FEATURE_V8)) {
7736         return false;
7737     }
7738     /*
7739      * SG (v8M only)
7740      * The bulk of the behaviour for this instruction is implemented
7741      * in v7m_handle_execute_nsc(), which deals with the insn when
7742      * it is executed by a CPU in non-secure state from memory
7743      * which is Secure & NonSecure-Callable.
7744      * Here we only need to handle the remaining cases:
7745      *  * in NS memory (including the "security extension not
7746      *    implemented" case) : NOP
7747      *  * in S memory but CPU already secure (clear IT bits)
7748      * We know that the attribute for the memory this insn is
7749      * in must match the current CPU state, because otherwise
7750      * get_phys_addr_pmsav8 would have generated an exception.
7751      */
7752     if (s->v8m_secure) {
7753         /* Like the IT insn, we don't need to generate any code */
7754         s->condexec_cond = 0;
7755         s->condexec_mask = 0;
7756     }
7757     return true;
7758 }
7759
7760 static bool trans_TT(DisasContext *s, arg_TT *a)
7761 {
7762     TCGv_i32 addr, tmp;
7763
7764     if (!arm_dc_feature(s, ARM_FEATURE_M) ||
7765         !arm_dc_feature(s, ARM_FEATURE_V8)) {
7766         return false;
7767     }
7768     if (a->rd == 13 || a->rd == 15 || a->rn == 15) {
7769         /* We UNDEF for these UNPREDICTABLE cases */
7770         unallocated_encoding(s);
7771         return true;
7772     }
7773     if (a->A && !s->v8m_secure) {
7774         /* This case is UNDEFINED.  */
7775         unallocated_encoding(s);
7776         return true;
7777     }
7778
7779     addr = load_reg(s, a->rn);
7780     tmp = tcg_const_i32((a->A << 1) | a->T);
7781     gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
7782     tcg_temp_free_i32(addr);
7783     store_reg(s, a->rd, tmp);
7784     return true;
7785 }
7786
7787 /*
7788  * Load/store register index
7789  */
7790
7791 static ISSInfo make_issinfo(DisasContext *s, int rd, bool p, bool w)
7792 {
7793     ISSInfo ret;
7794
7795     /* ISS not valid if writeback */
7796     if (p && !w) {
7797         ret = rd;
7798         if (s->base.pc_next - s->pc_curr == 2) {
7799             ret |= ISSIs16Bit;
7800         }
7801     } else {
7802         ret = ISSInvalid;
7803     }
7804     return ret;
7805 }
7806
7807 static TCGv_i32 op_addr_rr_pre(DisasContext *s, arg_ldst_rr *a)
7808 {
7809     TCGv_i32 addr = load_reg(s, a->rn);
7810
7811     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7812         gen_helper_v8m_stackcheck(cpu_env, addr);
7813     }
7814
7815     if (a->p) {
7816         TCGv_i32 ofs = load_reg(s, a->rm);
7817         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
7818         if (a->u) {
7819             tcg_gen_add_i32(addr, addr, ofs);
7820         } else {
7821             tcg_gen_sub_i32(addr, addr, ofs);
7822         }
7823         tcg_temp_free_i32(ofs);
7824     }
7825     return addr;
7826 }
7827
7828 static void op_addr_rr_post(DisasContext *s, arg_ldst_rr *a,
7829                             TCGv_i32 addr, int address_offset)
7830 {
7831     if (!a->p) {
7832         TCGv_i32 ofs = load_reg(s, a->rm);
7833         gen_arm_shift_im(ofs, a->shtype, a->shimm, 0);
7834         if (a->u) {
7835             tcg_gen_add_i32(addr, addr, ofs);
7836         } else {
7837             tcg_gen_sub_i32(addr, addr, ofs);
7838         }
7839         tcg_temp_free_i32(ofs);
7840     } else if (!a->w) {
7841         tcg_temp_free_i32(addr);
7842         return;
7843     }
7844     tcg_gen_addi_i32(addr, addr, address_offset);
7845     store_reg(s, a->rn, addr);
7846 }
7847
7848 static bool op_load_rr(DisasContext *s, arg_ldst_rr *a,
7849                        MemOp mop, int mem_idx)
7850 {
7851     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
7852     TCGv_i32 addr, tmp;
7853
7854     addr = op_addr_rr_pre(s, a);
7855
7856     tmp = tcg_temp_new_i32();
7857     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
7858     disas_set_da_iss(s, mop, issinfo);
7859
7860     /*
7861      * Perform base writeback before the loaded value to
7862      * ensure correct behavior with overlapping index registers.
7863      */
7864     op_addr_rr_post(s, a, addr, 0);
7865     store_reg_from_load(s, a->rt, tmp);
7866     return true;
7867 }
7868
7869 static bool op_store_rr(DisasContext *s, arg_ldst_rr *a,
7870                         MemOp mop, int mem_idx)
7871 {
7872     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
7873     TCGv_i32 addr, tmp;
7874
7875     addr = op_addr_rr_pre(s, a);
7876
7877     tmp = load_reg(s, a->rt);
7878     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
7879     disas_set_da_iss(s, mop, issinfo);
7880     tcg_temp_free_i32(tmp);
7881
7882     op_addr_rr_post(s, a, addr, 0);
7883     return true;
7884 }
7885
7886 static bool trans_LDRD_rr(DisasContext *s, arg_ldst_rr *a)
7887 {
7888     int mem_idx = get_mem_index(s);
7889     TCGv_i32 addr, tmp;
7890
7891     if (!ENABLE_ARCH_5TE) {
7892         return false;
7893     }
7894     if (a->rt & 1) {
7895         unallocated_encoding(s);
7896         return true;
7897     }
7898     addr = op_addr_rr_pre(s, a);
7899
7900     tmp = tcg_temp_new_i32();
7901     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7902     store_reg(s, a->rt, tmp);
7903
7904     tcg_gen_addi_i32(addr, addr, 4);
7905
7906     tmp = tcg_temp_new_i32();
7907     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7908     store_reg(s, a->rt + 1, tmp);
7909
7910     /* LDRD w/ base writeback is undefined if the registers overlap.  */
7911     op_addr_rr_post(s, a, addr, -4);
7912     return true;
7913 }
7914
7915 static bool trans_STRD_rr(DisasContext *s, arg_ldst_rr *a)
7916 {
7917     int mem_idx = get_mem_index(s);
7918     TCGv_i32 addr, tmp;
7919
7920     if (!ENABLE_ARCH_5TE) {
7921         return false;
7922     }
7923     if (a->rt & 1) {
7924         unallocated_encoding(s);
7925         return true;
7926     }
7927     addr = op_addr_rr_pre(s, a);
7928
7929     tmp = load_reg(s, a->rt);
7930     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7931     tcg_temp_free_i32(tmp);
7932
7933     tcg_gen_addi_i32(addr, addr, 4);
7934
7935     tmp = load_reg(s, a->rt + 1);
7936     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
7937     tcg_temp_free_i32(tmp);
7938
7939     op_addr_rr_post(s, a, addr, -4);
7940     return true;
7941 }
7942
7943 /*
7944  * Load/store immediate index
7945  */
7946
7947 static TCGv_i32 op_addr_ri_pre(DisasContext *s, arg_ldst_ri *a)
7948 {
7949     int ofs = a->imm;
7950
7951     if (!a->u) {
7952         ofs = -ofs;
7953     }
7954
7955     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
7956         /*
7957          * Stackcheck. Here we know 'addr' is the current SP;
7958          * U is set if we're moving SP up, else down. It is
7959          * UNKNOWN whether the limit check triggers when SP starts
7960          * below the limit and ends up above it; we chose to do so.
7961          */
7962         if (!a->u) {
7963             TCGv_i32 newsp = tcg_temp_new_i32();
7964             tcg_gen_addi_i32(newsp, cpu_R[13], ofs);
7965             gen_helper_v8m_stackcheck(cpu_env, newsp);
7966             tcg_temp_free_i32(newsp);
7967         } else {
7968             gen_helper_v8m_stackcheck(cpu_env, cpu_R[13]);
7969         }
7970     }
7971
7972     return add_reg_for_lit(s, a->rn, a->p ? ofs : 0);
7973 }
7974
7975 static void op_addr_ri_post(DisasContext *s, arg_ldst_ri *a,
7976                             TCGv_i32 addr, int address_offset)
7977 {
7978     if (!a->p) {
7979         if (a->u) {
7980             address_offset += a->imm;
7981         } else {
7982             address_offset -= a->imm;
7983         }
7984     } else if (!a->w) {
7985         tcg_temp_free_i32(addr);
7986         return;
7987     }
7988     tcg_gen_addi_i32(addr, addr, address_offset);
7989     store_reg(s, a->rn, addr);
7990 }
7991
7992 static bool op_load_ri(DisasContext *s, arg_ldst_ri *a,
7993                        MemOp mop, int mem_idx)
7994 {
7995     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
7996     TCGv_i32 addr, tmp;
7997
7998     addr = op_addr_ri_pre(s, a);
7999
8000     tmp = tcg_temp_new_i32();
8001     gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8002     disas_set_da_iss(s, mop, issinfo);
8003
8004     /*
8005      * Perform base writeback before the loaded value to
8006      * ensure correct behavior with overlapping index registers.
8007      */
8008     op_addr_ri_post(s, a, addr, 0);
8009     store_reg_from_load(s, a->rt, tmp);
8010     return true;
8011 }
8012
8013 static bool op_store_ri(DisasContext *s, arg_ldst_ri *a,
8014                         MemOp mop, int mem_idx)
8015 {
8016     ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) | ISSIsWrite;
8017     TCGv_i32 addr, tmp;
8018
8019     addr = op_addr_ri_pre(s, a);
8020
8021     tmp = load_reg(s, a->rt);
8022     gen_aa32_st_i32(s, tmp, addr, mem_idx, mop | s->be_data);
8023     disas_set_da_iss(s, mop, issinfo);
8024     tcg_temp_free_i32(tmp);
8025
8026     op_addr_ri_post(s, a, addr, 0);
8027     return true;
8028 }
8029
8030 static bool op_ldrd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8031 {
8032     int mem_idx = get_mem_index(s);
8033     TCGv_i32 addr, tmp;
8034
8035     addr = op_addr_ri_pre(s, a);
8036
8037     tmp = tcg_temp_new_i32();
8038     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8039     store_reg(s, a->rt, tmp);
8040
8041     tcg_gen_addi_i32(addr, addr, 4);
8042
8043     tmp = tcg_temp_new_i32();
8044     gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8045     store_reg(s, rt2, tmp);
8046
8047     /* LDRD w/ base writeback is undefined if the registers overlap.  */
8048     op_addr_ri_post(s, a, addr, -4);
8049     return true;
8050 }
8051
8052 static bool trans_LDRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8053 {
8054     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8055         return false;
8056     }
8057     return op_ldrd_ri(s, a, a->rt + 1);
8058 }
8059
8060 static bool trans_LDRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8061 {
8062     arg_ldst_ri b = {
8063         .u = a->u, .w = a->w, .p = a->p,
8064         .rn = a->rn, .rt = a->rt, .imm = a->imm
8065     };
8066     return op_ldrd_ri(s, &b, a->rt2);
8067 }
8068
8069 static bool op_strd_ri(DisasContext *s, arg_ldst_ri *a, int rt2)
8070 {
8071     int mem_idx = get_mem_index(s);
8072     TCGv_i32 addr, tmp;
8073
8074     addr = op_addr_ri_pre(s, a);
8075
8076     tmp = load_reg(s, a->rt);
8077     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8078     tcg_temp_free_i32(tmp);
8079
8080     tcg_gen_addi_i32(addr, addr, 4);
8081
8082     tmp = load_reg(s, rt2);
8083     gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL | s->be_data);
8084     tcg_temp_free_i32(tmp);
8085
8086     op_addr_ri_post(s, a, addr, -4);
8087     return true;
8088 }
8089
8090 static bool trans_STRD_ri_a32(DisasContext *s, arg_ldst_ri *a)
8091 {
8092     if (!ENABLE_ARCH_5TE || (a->rt & 1)) {
8093         return false;
8094     }
8095     return op_strd_ri(s, a, a->rt + 1);
8096 }
8097
8098 static bool trans_STRD_ri_t32(DisasContext *s, arg_ldst_ri2 *a)
8099 {
8100     arg_ldst_ri b = {
8101         .u = a->u, .w = a->w, .p = a->p,
8102         .rn = a->rn, .rt = a->rt, .imm = a->imm
8103     };
8104     return op_strd_ri(s, &b, a->rt2);
8105 }
8106
8107 #define DO_LDST(NAME, WHICH, MEMOP) \
8108 static bool trans_##NAME##_ri(DisasContext *s, arg_ldst_ri *a)        \
8109 {                                                                     \
8110     return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s));            \
8111 }                                                                     \
8112 static bool trans_##NAME##T_ri(DisasContext *s, arg_ldst_ri *a)       \
8113 {                                                                     \
8114     return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s));   \
8115 }                                                                     \
8116 static bool trans_##NAME##_rr(DisasContext *s, arg_ldst_rr *a)        \
8117 {                                                                     \
8118     return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s));            \
8119 }                                                                     \
8120 static bool trans_##NAME##T_rr(DisasContext *s, arg_ldst_rr *a)       \
8121 {                                                                     \
8122     return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s));   \
8123 }
8124
8125 DO_LDST(LDR, load, MO_UL)
8126 DO_LDST(LDRB, load, MO_UB)
8127 DO_LDST(LDRH, load, MO_UW)
8128 DO_LDST(LDRSB, load, MO_SB)
8129 DO_LDST(LDRSH, load, MO_SW)
8130
8131 DO_LDST(STR, store, MO_UL)
8132 DO_LDST(STRB, store, MO_UB)
8133 DO_LDST(STRH, store, MO_UW)
8134
8135 #undef DO_LDST
8136
8137 /*
8138  * Synchronization primitives
8139  */
8140
8141 static bool op_swp(DisasContext *s, arg_SWP *a, MemOp opc)
8142 {
8143     TCGv_i32 addr, tmp;
8144     TCGv taddr;
8145
8146     opc |= s->be_data;
8147     addr = load_reg(s, a->rn);
8148     taddr = gen_aa32_addr(s, addr, opc);
8149     tcg_temp_free_i32(addr);
8150
8151     tmp = load_reg(s, a->rt2);
8152     tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
8153     tcg_temp_free(taddr);
8154
8155     store_reg(s, a->rt, tmp);
8156     return true;
8157 }
8158
8159 static bool trans_SWP(DisasContext *s, arg_SWP *a)
8160 {
8161     return op_swp(s, a, MO_UL | MO_ALIGN);
8162 }
8163
8164 static bool trans_SWPB(DisasContext *s, arg_SWP *a)
8165 {
8166     return op_swp(s, a, MO_UB);
8167 }
8168
8169 /*
8170  * Load/Store Exclusive and Load-Acquire/Store-Release
8171  */
8172
8173 static bool op_strex(DisasContext *s, arg_STREX *a, MemOp mop, bool rel)
8174 {
8175     TCGv_i32 addr;
8176     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
8177     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
8178
8179     /* We UNDEF for these UNPREDICTABLE cases.  */
8180     if (a->rd == 15 || a->rn == 15 || a->rt == 15
8181         || a->rd == a->rn || a->rd == a->rt
8182         || (!v8a && s->thumb && (a->rd == 13 || a->rt == 13))
8183         || (mop == MO_64
8184             && (a->rt2 == 15
8185                 || a->rd == a->rt2
8186                 || (!v8a && s->thumb && a->rt2 == 13)))) {
8187         unallocated_encoding(s);
8188         return true;
8189     }
8190
8191     if (rel) {
8192         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8193     }
8194
8195     addr = tcg_temp_local_new_i32();
8196     load_reg_var(s, addr, a->rn);
8197     tcg_gen_addi_i32(addr, addr, a->imm);
8198
8199     gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
8200     tcg_temp_free_i32(addr);
8201     return true;
8202 }
8203
8204 static bool trans_STREX(DisasContext *s, arg_STREX *a)
8205 {
8206     if (!ENABLE_ARCH_6) {
8207         return false;
8208     }
8209     return op_strex(s, a, MO_32, false);
8210 }
8211
8212 static bool trans_STREXD_a32(DisasContext *s, arg_STREX *a)
8213 {
8214     if (!ENABLE_ARCH_6K) {
8215         return false;
8216     }
8217     /* We UNDEF for these UNPREDICTABLE cases.  */
8218     if (a->rt & 1) {
8219         unallocated_encoding(s);
8220         return true;
8221     }
8222     a->rt2 = a->rt + 1;
8223     return op_strex(s, a, MO_64, false);
8224 }
8225
8226 static bool trans_STREXD_t32(DisasContext *s, arg_STREX *a)
8227 {
8228     return op_strex(s, a, MO_64, false);
8229 }
8230
8231 static bool trans_STREXB(DisasContext *s, arg_STREX *a)
8232 {
8233     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8234         return false;
8235     }
8236     return op_strex(s, a, MO_8, false);
8237 }
8238
8239 static bool trans_STREXH(DisasContext *s, arg_STREX *a)
8240 {
8241     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8242         return false;
8243     }
8244     return op_strex(s, a, MO_16, false);
8245 }
8246
8247 static bool trans_STLEX(DisasContext *s, arg_STREX *a)
8248 {
8249     if (!ENABLE_ARCH_8) {
8250         return false;
8251     }
8252     return op_strex(s, a, MO_32, true);
8253 }
8254
8255 static bool trans_STLEXD_a32(DisasContext *s, arg_STREX *a)
8256 {
8257     if (!ENABLE_ARCH_8) {
8258         return false;
8259     }
8260     /* We UNDEF for these UNPREDICTABLE cases.  */
8261     if (a->rt & 1) {
8262         unallocated_encoding(s);
8263         return true;
8264     }
8265     a->rt2 = a->rt + 1;
8266     return op_strex(s, a, MO_64, true);
8267 }
8268
8269 static bool trans_STLEXD_t32(DisasContext *s, arg_STREX *a)
8270 {
8271     if (!ENABLE_ARCH_8) {
8272         return false;
8273     }
8274     return op_strex(s, a, MO_64, true);
8275 }
8276
8277 static bool trans_STLEXB(DisasContext *s, arg_STREX *a)
8278 {
8279     if (!ENABLE_ARCH_8) {
8280         return false;
8281     }
8282     return op_strex(s, a, MO_8, true);
8283 }
8284
8285 static bool trans_STLEXH(DisasContext *s, arg_STREX *a)
8286 {
8287     if (!ENABLE_ARCH_8) {
8288         return false;
8289     }
8290     return op_strex(s, a, MO_16, true);
8291 }
8292
8293 static bool op_stl(DisasContext *s, arg_STL *a, MemOp mop)
8294 {
8295     TCGv_i32 addr, tmp;
8296
8297     if (!ENABLE_ARCH_8) {
8298         return false;
8299     }
8300     /* We UNDEF for these UNPREDICTABLE cases.  */
8301     if (a->rn == 15 || a->rt == 15) {
8302         unallocated_encoding(s);
8303         return true;
8304     }
8305
8306     addr = load_reg(s, a->rn);
8307     tmp = load_reg(s, a->rt);
8308     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8309     gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
8310     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel | ISSIsWrite);
8311
8312     tcg_temp_free_i32(tmp);
8313     tcg_temp_free_i32(addr);
8314     return true;
8315 }
8316
8317 static bool trans_STL(DisasContext *s, arg_STL *a)
8318 {
8319     return op_stl(s, a, MO_UL);
8320 }
8321
8322 static bool trans_STLB(DisasContext *s, arg_STL *a)
8323 {
8324     return op_stl(s, a, MO_UB);
8325 }
8326
8327 static bool trans_STLH(DisasContext *s, arg_STL *a)
8328 {
8329     return op_stl(s, a, MO_UW);
8330 }
8331
8332 static bool op_ldrex(DisasContext *s, arg_LDREX *a, MemOp mop, bool acq)
8333 {
8334     TCGv_i32 addr;
8335     /* Some cases stopped being UNPREDICTABLE in v8A (but not v8M) */
8336     bool v8a = ENABLE_ARCH_8 && !arm_dc_feature(s, ARM_FEATURE_M);
8337
8338     /* We UNDEF for these UNPREDICTABLE cases.  */
8339     if (a->rn == 15 || a->rt == 15
8340         || (!v8a && s->thumb && a->rt == 13)
8341         || (mop == MO_64
8342             && (a->rt2 == 15 || a->rt == a->rt2
8343                 || (!v8a && s->thumb && a->rt2 == 13)))) {
8344         unallocated_encoding(s);
8345         return true;
8346     }
8347
8348     addr = tcg_temp_local_new_i32();
8349     load_reg_var(s, addr, a->rn);
8350     tcg_gen_addi_i32(addr, addr, a->imm);
8351
8352     gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
8353     tcg_temp_free_i32(addr);
8354
8355     if (acq) {
8356         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
8357     }
8358     return true;
8359 }
8360
8361 static bool trans_LDREX(DisasContext *s, arg_LDREX *a)
8362 {
8363     if (!ENABLE_ARCH_6) {
8364         return false;
8365     }
8366     return op_ldrex(s, a, MO_32, false);
8367 }
8368
8369 static bool trans_LDREXD_a32(DisasContext *s, arg_LDREX *a)
8370 {
8371     if (!ENABLE_ARCH_6K) {
8372         return false;
8373     }
8374     /* We UNDEF for these UNPREDICTABLE cases.  */
8375     if (a->rt & 1) {
8376         unallocated_encoding(s);
8377         return true;
8378     }
8379     a->rt2 = a->rt + 1;
8380     return op_ldrex(s, a, MO_64, false);
8381 }
8382
8383 static bool trans_LDREXD_t32(DisasContext *s, arg_LDREX *a)
8384 {
8385     return op_ldrex(s, a, MO_64, false);
8386 }
8387
8388 static bool trans_LDREXB(DisasContext *s, arg_LDREX *a)
8389 {
8390     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8391         return false;
8392     }
8393     return op_ldrex(s, a, MO_8, false);
8394 }
8395
8396 static bool trans_LDREXH(DisasContext *s, arg_LDREX *a)
8397 {
8398     if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8399         return false;
8400     }
8401     return op_ldrex(s, a, MO_16, false);
8402 }
8403
8404 static bool trans_LDAEX(DisasContext *s, arg_LDREX *a)
8405 {
8406     if (!ENABLE_ARCH_8) {
8407         return false;
8408     }
8409     return op_ldrex(s, a, MO_32, true);
8410 }
8411
8412 static bool trans_LDAEXD_a32(DisasContext *s, arg_LDREX *a)
8413 {
8414     if (!ENABLE_ARCH_8) {
8415         return false;
8416     }
8417     /* We UNDEF for these UNPREDICTABLE cases.  */
8418     if (a->rt & 1) {
8419         unallocated_encoding(s);
8420         return true;
8421     }
8422     a->rt2 = a->rt + 1;
8423     return op_ldrex(s, a, MO_64, true);
8424 }
8425
8426 static bool trans_LDAEXD_t32(DisasContext *s, arg_LDREX *a)
8427 {
8428     if (!ENABLE_ARCH_8) {
8429         return false;
8430     }
8431     return op_ldrex(s, a, MO_64, true);
8432 }
8433
8434 static bool trans_LDAEXB(DisasContext *s, arg_LDREX *a)
8435 {
8436     if (!ENABLE_ARCH_8) {
8437         return false;
8438     }
8439     return op_ldrex(s, a, MO_8, true);
8440 }
8441
8442 static bool trans_LDAEXH(DisasContext *s, arg_LDREX *a)
8443 {
8444     if (!ENABLE_ARCH_8) {
8445         return false;
8446     }
8447     return op_ldrex(s, a, MO_16, true);
8448 }
8449
8450 static bool op_lda(DisasContext *s, arg_LDA *a, MemOp mop)
8451 {
8452     TCGv_i32 addr, tmp;
8453
8454     if (!ENABLE_ARCH_8) {
8455         return false;
8456     }
8457     /* We UNDEF for these UNPREDICTABLE cases.  */
8458     if (a->rn == 15 || a->rt == 15) {
8459         unallocated_encoding(s);
8460         return true;
8461     }
8462
8463     addr = load_reg(s, a->rn);
8464     tmp = tcg_temp_new_i32();
8465     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop | s->be_data);
8466     disas_set_da_iss(s, mop, a->rt | ISSIsAcqRel);
8467     tcg_temp_free_i32(addr);
8468
8469     store_reg(s, a->rt, tmp);
8470     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
8471     return true;
8472 }
8473
8474 static bool trans_LDA(DisasContext *s, arg_LDA *a)
8475 {
8476     return op_lda(s, a, MO_UL);
8477 }
8478
8479 static bool trans_LDAB(DisasContext *s, arg_LDA *a)
8480 {
8481     return op_lda(s, a, MO_UB);
8482 }
8483
8484 static bool trans_LDAH(DisasContext *s, arg_LDA *a)
8485 {
8486     return op_lda(s, a, MO_UW);
8487 }
8488
8489 /*
8490  * Media instructions
8491  */
8492
8493 static bool trans_USADA8(DisasContext *s, arg_USADA8 *a)
8494 {
8495     TCGv_i32 t1, t2;
8496
8497     if (!ENABLE_ARCH_6) {
8498         return false;
8499     }
8500
8501     t1 = load_reg(s, a->rn);
8502     t2 = load_reg(s, a->rm);
8503     gen_helper_usad8(t1, t1, t2);
8504     tcg_temp_free_i32(t2);
8505     if (a->ra != 15) {
8506         t2 = load_reg(s, a->ra);
8507         tcg_gen_add_i32(t1, t1, t2);
8508         tcg_temp_free_i32(t2);
8509     }
8510     store_reg(s, a->rd, t1);
8511     return true;
8512 }
8513
8514 static bool op_bfx(DisasContext *s, arg_UBFX *a, bool u)
8515 {
8516     TCGv_i32 tmp;
8517     int width = a->widthm1 + 1;
8518     int shift = a->lsb;
8519
8520     if (!ENABLE_ARCH_6T2) {
8521         return false;
8522     }
8523     if (shift + width > 32) {
8524         /* UNPREDICTABLE; we choose to UNDEF */
8525         unallocated_encoding(s);
8526         return true;
8527     }
8528
8529     tmp = load_reg(s, a->rn);
8530     if (u) {
8531         tcg_gen_extract_i32(tmp, tmp, shift, width);
8532     } else {
8533         tcg_gen_sextract_i32(tmp, tmp, shift, width);
8534     }
8535     store_reg(s, a->rd, tmp);
8536     return true;
8537 }
8538
8539 static bool trans_SBFX(DisasContext *s, arg_SBFX *a)
8540 {
8541     return op_bfx(s, a, false);
8542 }
8543
8544 static bool trans_UBFX(DisasContext *s, arg_UBFX *a)
8545 {
8546     return op_bfx(s, a, true);
8547 }
8548
8549 static bool trans_BFCI(DisasContext *s, arg_BFCI *a)
8550 {
8551     TCGv_i32 tmp;
8552     int msb = a->msb, lsb = a->lsb;
8553     int width;
8554
8555     if (!ENABLE_ARCH_6T2) {
8556         return false;
8557     }
8558     if (msb < lsb) {
8559         /* UNPREDICTABLE; we choose to UNDEF */
8560         unallocated_encoding(s);
8561         return true;
8562     }
8563
8564     width = msb + 1 - lsb;
8565     if (a->rn == 15) {
8566         /* BFC */
8567         tmp = tcg_const_i32(0);
8568     } else {
8569         /* BFI */
8570         tmp = load_reg(s, a->rn);
8571     }
8572     if (width != 32) {
8573         TCGv_i32 tmp2 = load_reg(s, a->rd);
8574         tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
8575         tcg_temp_free_i32(tmp2);
8576     }
8577     store_reg(s, a->rd, tmp);
8578     return true;
8579 }
8580
8581 static bool trans_UDF(DisasContext *s, arg_UDF *a)
8582 {
8583     unallocated_encoding(s);
8584     return true;
8585 }
8586
8587 /*
8588  * Parallel addition and subtraction
8589  */
8590
8591 static bool op_par_addsub(DisasContext *s, arg_rrr *a,
8592                           void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
8593 {
8594     TCGv_i32 t0, t1;
8595
8596     if (s->thumb
8597         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8598         : !ENABLE_ARCH_6) {
8599         return false;
8600     }
8601
8602     t0 = load_reg(s, a->rn);
8603     t1 = load_reg(s, a->rm);
8604
8605     gen(t0, t0, t1);
8606
8607     tcg_temp_free_i32(t1);
8608     store_reg(s, a->rd, t0);
8609     return true;
8610 }
8611
8612 static bool op_par_addsub_ge(DisasContext *s, arg_rrr *a,
8613                              void (*gen)(TCGv_i32, TCGv_i32,
8614                                          TCGv_i32, TCGv_ptr))
8615 {
8616     TCGv_i32 t0, t1;
8617     TCGv_ptr ge;
8618
8619     if (s->thumb
8620         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8621         : !ENABLE_ARCH_6) {
8622         return false;
8623     }
8624
8625     t0 = load_reg(s, a->rn);
8626     t1 = load_reg(s, a->rm);
8627
8628     ge = tcg_temp_new_ptr();
8629     tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
8630     gen(t0, t0, t1, ge);
8631
8632     tcg_temp_free_ptr(ge);
8633     tcg_temp_free_i32(t1);
8634     store_reg(s, a->rd, t0);
8635     return true;
8636 }
8637
8638 #define DO_PAR_ADDSUB(NAME, helper) \
8639 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
8640 {                                                       \
8641     return op_par_addsub(s, a, helper);                 \
8642 }
8643
8644 #define DO_PAR_ADDSUB_GE(NAME, helper) \
8645 static bool trans_##NAME(DisasContext *s, arg_rrr *a)   \
8646 {                                                       \
8647     return op_par_addsub_ge(s, a, helper);              \
8648 }
8649
8650 DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
8651 DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
8652 DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
8653 DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
8654 DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
8655 DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
8656
8657 DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
8658 DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
8659 DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
8660 DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
8661 DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
8662 DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
8663
8664 DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
8665 DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
8666 DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
8667 DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
8668 DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
8669 DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
8670
8671 DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
8672 DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
8673 DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
8674 DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
8675 DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
8676 DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
8677
8678 DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
8679 DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
8680 DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
8681 DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
8682 DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
8683 DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
8684
8685 DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
8686 DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
8687 DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
8688 DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
8689 DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
8690 DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
8691
8692 #undef DO_PAR_ADDSUB
8693 #undef DO_PAR_ADDSUB_GE
8694
8695 /*
8696  * Packing, unpacking, saturation, and reversal
8697  */
8698
8699 static bool trans_PKH(DisasContext *s, arg_PKH *a)
8700 {
8701     TCGv_i32 tn, tm;
8702     int shift = a->imm;
8703
8704     if (s->thumb
8705         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8706         : !ENABLE_ARCH_6) {
8707         return false;
8708     }
8709
8710     tn = load_reg(s, a->rn);
8711     tm = load_reg(s, a->rm);
8712     if (a->tb) {
8713         /* PKHTB */
8714         if (shift == 0) {
8715             shift = 31;
8716         }
8717         tcg_gen_sari_i32(tm, tm, shift);
8718         tcg_gen_deposit_i32(tn, tn, tm, 0, 16);
8719     } else {
8720         /* PKHBT */
8721         tcg_gen_shli_i32(tm, tm, shift);
8722         tcg_gen_deposit_i32(tn, tm, tn, 0, 16);
8723     }
8724     tcg_temp_free_i32(tm);
8725     store_reg(s, a->rd, tn);
8726     return true;
8727 }
8728
8729 static bool op_sat(DisasContext *s, arg_sat *a,
8730                    void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
8731 {
8732     TCGv_i32 tmp, satimm;
8733     int shift = a->imm;
8734
8735     if (!ENABLE_ARCH_6) {
8736         return false;
8737     }
8738
8739     tmp = load_reg(s, a->rn);
8740     if (a->sh) {
8741         tcg_gen_sari_i32(tmp, tmp, shift ? shift : 31);
8742     } else {
8743         tcg_gen_shli_i32(tmp, tmp, shift);
8744     }
8745
8746     satimm = tcg_const_i32(a->satimm);
8747     gen(tmp, cpu_env, tmp, satimm);
8748     tcg_temp_free_i32(satimm);
8749
8750     store_reg(s, a->rd, tmp);
8751     return true;
8752 }
8753
8754 static bool trans_SSAT(DisasContext *s, arg_sat *a)
8755 {
8756     return op_sat(s, a, gen_helper_ssat);
8757 }
8758
8759 static bool trans_USAT(DisasContext *s, arg_sat *a)
8760 {
8761     return op_sat(s, a, gen_helper_usat);
8762 }
8763
8764 static bool trans_SSAT16(DisasContext *s, arg_sat *a)
8765 {
8766     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
8767         return false;
8768     }
8769     return op_sat(s, a, gen_helper_ssat16);
8770 }
8771
8772 static bool trans_USAT16(DisasContext *s, arg_sat *a)
8773 {
8774     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
8775         return false;
8776     }
8777     return op_sat(s, a, gen_helper_usat16);
8778 }
8779
8780 static bool op_xta(DisasContext *s, arg_rrr_rot *a,
8781                    void (*gen_extract)(TCGv_i32, TCGv_i32),
8782                    void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
8783 {
8784     TCGv_i32 tmp;
8785
8786     if (!ENABLE_ARCH_6) {
8787         return false;
8788     }
8789
8790     tmp = load_reg(s, a->rm);
8791     /*
8792      * TODO: In many cases we could do a shift instead of a rotate.
8793      * Combined with a simple extend, that becomes an extract.
8794      */
8795     tcg_gen_rotri_i32(tmp, tmp, a->rot * 8);
8796     gen_extract(tmp, tmp);
8797
8798     if (a->rn != 15) {
8799         TCGv_i32 tmp2 = load_reg(s, a->rn);
8800         gen_add(tmp, tmp, tmp2);
8801         tcg_temp_free_i32(tmp2);
8802     }
8803     store_reg(s, a->rd, tmp);
8804     return true;
8805 }
8806
8807 static bool trans_SXTAB(DisasContext *s, arg_rrr_rot *a)
8808 {
8809     return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
8810 }
8811
8812 static bool trans_SXTAH(DisasContext *s, arg_rrr_rot *a)
8813 {
8814     return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
8815 }
8816
8817 static bool trans_SXTAB16(DisasContext *s, arg_rrr_rot *a)
8818 {
8819     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
8820         return false;
8821     }
8822     return op_xta(s, a, gen_helper_sxtb16, gen_add16);
8823 }
8824
8825 static bool trans_UXTAB(DisasContext *s, arg_rrr_rot *a)
8826 {
8827     return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
8828 }
8829
8830 static bool trans_UXTAH(DisasContext *s, arg_rrr_rot *a)
8831 {
8832     return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
8833 }
8834
8835 static bool trans_UXTAB16(DisasContext *s, arg_rrr_rot *a)
8836 {
8837     if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
8838         return false;
8839     }
8840     return op_xta(s, a, gen_helper_uxtb16, gen_add16);
8841 }
8842
8843 static bool trans_SEL(DisasContext *s, arg_rrr *a)
8844 {
8845     TCGv_i32 t1, t2, t3;
8846
8847     if (s->thumb
8848         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8849         : !ENABLE_ARCH_6) {
8850         return false;
8851     }
8852
8853     t1 = load_reg(s, a->rn);
8854     t2 = load_reg(s, a->rm);
8855     t3 = tcg_temp_new_i32();
8856     tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
8857     gen_helper_sel_flags(t1, t3, t1, t2);
8858     tcg_temp_free_i32(t3);
8859     tcg_temp_free_i32(t2);
8860     store_reg(s, a->rd, t1);
8861     return true;
8862 }
8863
8864 static bool op_rr(DisasContext *s, arg_rr *a,
8865                   void (*gen)(TCGv_i32, TCGv_i32))
8866 {
8867     TCGv_i32 tmp;
8868
8869     tmp = load_reg(s, a->rm);
8870     gen(tmp, tmp);
8871     store_reg(s, a->rd, tmp);
8872     return true;
8873 }
8874
8875 static bool trans_REV(DisasContext *s, arg_rr *a)
8876 {
8877     if (!ENABLE_ARCH_6) {
8878         return false;
8879     }
8880     return op_rr(s, a, tcg_gen_bswap32_i32);
8881 }
8882
8883 static bool trans_REV16(DisasContext *s, arg_rr *a)
8884 {
8885     if (!ENABLE_ARCH_6) {
8886         return false;
8887     }
8888     return op_rr(s, a, gen_rev16);
8889 }
8890
8891 static bool trans_REVSH(DisasContext *s, arg_rr *a)
8892 {
8893     if (!ENABLE_ARCH_6) {
8894         return false;
8895     }
8896     return op_rr(s, a, gen_revsh);
8897 }
8898
8899 static bool trans_RBIT(DisasContext *s, arg_rr *a)
8900 {
8901     if (!ENABLE_ARCH_6T2) {
8902         return false;
8903     }
8904     return op_rr(s, a, gen_helper_rbit);
8905 }
8906
8907 /*
8908  * Signed multiply, signed and unsigned divide
8909  */
8910
8911 static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
8912 {
8913     TCGv_i32 t1, t2;
8914
8915     if (!ENABLE_ARCH_6) {
8916         return false;
8917     }
8918
8919     t1 = load_reg(s, a->rn);
8920     t2 = load_reg(s, a->rm);
8921     if (m_swap) {
8922         gen_swap_half(t2);
8923     }
8924     gen_smul_dual(t1, t2);
8925
8926     if (sub) {
8927         /* This subtraction cannot overflow. */
8928         tcg_gen_sub_i32(t1, t1, t2);
8929     } else {
8930         /*
8931          * This addition cannot overflow 32 bits; however it may
8932          * overflow considered as a signed operation, in which case
8933          * we must set the Q flag.
8934          */
8935         gen_helper_add_setq(t1, cpu_env, t1, t2);
8936     }
8937     tcg_temp_free_i32(t2);
8938
8939     if (a->ra != 15) {
8940         t2 = load_reg(s, a->ra);
8941         gen_helper_add_setq(t1, cpu_env, t1, t2);
8942         tcg_temp_free_i32(t2);
8943     }
8944     store_reg(s, a->rd, t1);
8945     return true;
8946 }
8947
8948 static bool trans_SMLAD(DisasContext *s, arg_rrrr *a)
8949 {
8950     return op_smlad(s, a, false, false);
8951 }
8952
8953 static bool trans_SMLADX(DisasContext *s, arg_rrrr *a)
8954 {
8955     return op_smlad(s, a, true, false);
8956 }
8957
8958 static bool trans_SMLSD(DisasContext *s, arg_rrrr *a)
8959 {
8960     return op_smlad(s, a, false, true);
8961 }
8962
8963 static bool trans_SMLSDX(DisasContext *s, arg_rrrr *a)
8964 {
8965     return op_smlad(s, a, true, true);
8966 }
8967
8968 static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub)
8969 {
8970     TCGv_i32 t1, t2;
8971     TCGv_i64 l1, l2;
8972
8973     if (!ENABLE_ARCH_6) {
8974         return false;
8975     }
8976
8977     t1 = load_reg(s, a->rn);
8978     t2 = load_reg(s, a->rm);
8979     if (m_swap) {
8980         gen_swap_half(t2);
8981     }
8982     gen_smul_dual(t1, t2);
8983
8984     l1 = tcg_temp_new_i64();
8985     l2 = tcg_temp_new_i64();
8986     tcg_gen_ext_i32_i64(l1, t1);
8987     tcg_gen_ext_i32_i64(l2, t2);
8988     tcg_temp_free_i32(t1);
8989     tcg_temp_free_i32(t2);
8990
8991     if (sub) {
8992         tcg_gen_sub_i64(l1, l1, l2);
8993     } else {
8994         tcg_gen_add_i64(l1, l1, l2);
8995     }
8996     tcg_temp_free_i64(l2);
8997
8998     gen_addq(s, l1, a->ra, a->rd);
8999     gen_storeq_reg(s, a->ra, a->rd, l1);
9000     tcg_temp_free_i64(l1);
9001     return true;
9002 }
9003
9004 static bool trans_SMLALD(DisasContext *s, arg_rrrr *a)
9005 {
9006     return op_smlald(s, a, false, false);
9007 }
9008
9009 static bool trans_SMLALDX(DisasContext *s, arg_rrrr *a)
9010 {
9011     return op_smlald(s, a, true, false);
9012 }
9013
9014 static bool trans_SMLSLD(DisasContext *s, arg_rrrr *a)
9015 {
9016     return op_smlald(s, a, false, true);
9017 }
9018
9019 static bool trans_SMLSLDX(DisasContext *s, arg_rrrr *a)
9020 {
9021     return op_smlald(s, a, true, true);
9022 }
9023
9024 static bool op_smmla(DisasContext *s, arg_rrrr *a, bool round, bool sub)
9025 {
9026     TCGv_i32 t1, t2;
9027
9028     if (s->thumb
9029         ? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9030         : !ENABLE_ARCH_6) {
9031         return false;
9032     }
9033
9034     t1 = load_reg(s, a->rn);
9035     t2 = load_reg(s, a->rm);
9036     tcg_gen_muls2_i32(t2, t1, t1, t2);
9037
9038     if (a->ra != 15) {
9039         TCGv_i32 t3 = load_reg(s, a->ra);
9040         if (sub) {
9041             /*
9042              * For SMMLS, we need a 64-bit subtract.  Borrow caused by
9043              * a non-zero multiplicand lowpart, and the correct result
9044              * lowpart for rounding.
9045              */
9046             TCGv_i32 zero = tcg_const_i32(0);
9047             tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
9048             tcg_temp_free_i32(zero);
9049         } else {
9050             tcg_gen_add_i32(t1, t1, t3);
9051         }
9052         tcg_temp_free_i32(t3);
9053     }
9054     if (round) {
9055         /*
9056          * Adding 0x80000000 to the 64-bit quantity means that we have
9057          * carry in to the high word when the low word has the msb set.
9058          */
9059         tcg_gen_shri_i32(t2, t2, 31);
9060         tcg_gen_add_i32(t1, t1, t2);
9061     }
9062     tcg_temp_free_i32(t2);
9063     store_reg(s, a->rd, t1);
9064     return true;
9065 }
9066
9067 static bool trans_SMMLA(DisasContext *s, arg_rrrr *a)
9068 {
9069     return op_smmla(s, a, false, false);
9070 }
9071
9072 static bool trans_SMMLAR(DisasContext *s, arg_rrrr *a)
9073 {
9074     return op_smmla(s, a, true, false);
9075 }
9076
9077 static bool trans_SMMLS(DisasContext *s, arg_rrrr *a)
9078 {
9079     return op_smmla(s, a, false, true);
9080 }
9081
9082 static bool trans_SMMLSR(DisasContext *s, arg_rrrr *a)
9083 {
9084     return op_smmla(s, a, true, true);
9085 }
9086
9087 static bool op_div(DisasContext *s, arg_rrr *a, bool u)
9088 {
9089     TCGv_i32 t1, t2;
9090
9091     if (s->thumb
9092         ? !dc_isar_feature(aa32_thumb_div, s)
9093         : !dc_isar_feature(aa32_arm_div, s)) {
9094         return false;
9095     }
9096
9097     t1 = load_reg(s, a->rn);
9098     t2 = load_reg(s, a->rm);
9099     if (u) {
9100         gen_helper_udiv(t1, t1, t2);
9101     } else {
9102         gen_helper_sdiv(t1, t1, t2);
9103     }
9104     tcg_temp_free_i32(t2);
9105     store_reg(s, a->rd, t1);
9106     return true;
9107 }
9108
9109 static bool trans_SDIV(DisasContext *s, arg_rrr *a)
9110 {
9111     return op_div(s, a, false);
9112 }
9113
9114 static bool trans_UDIV(DisasContext *s, arg_rrr *a)
9115 {
9116     return op_div(s, a, true);
9117 }
9118
9119 /*
9120  * Block data transfer
9121  */
9122
9123 static TCGv_i32 op_addr_block_pre(DisasContext *s, arg_ldst_block *a, int n)
9124 {
9125     TCGv_i32 addr = load_reg(s, a->rn);
9126
9127     if (a->b) {
9128         if (a->i) {
9129             /* pre increment */
9130             tcg_gen_addi_i32(addr, addr, 4);
9131         } else {
9132             /* pre decrement */
9133             tcg_gen_addi_i32(addr, addr, -(n * 4));
9134         }
9135     } else if (!a->i && n != 1) {
9136         /* post decrement */
9137         tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9138     }
9139
9140     if (s->v8m_stackcheck && a->rn == 13 && a->w) {
9141         /*
9142          * If the writeback is incrementing SP rather than
9143          * decrementing it, and the initial SP is below the
9144          * stack limit but the final written-back SP would
9145          * be above, then then we must not perform any memory
9146          * accesses, but it is IMPDEF whether we generate
9147          * an exception. We choose to do so in this case.
9148          * At this point 'addr' is the lowest address, so
9149          * either the original SP (if incrementing) or our
9150          * final SP (if decrementing), so that's what we check.
9151          */
9152         gen_helper_v8m_stackcheck(cpu_env, addr);
9153     }
9154
9155     return addr;
9156 }
9157
9158 static void op_addr_block_post(DisasContext *s, arg_ldst_block *a,
9159                                TCGv_i32 addr, int n)
9160 {
9161     if (a->w) {
9162         /* write back */
9163         if (!a->b) {
9164             if (a->i) {
9165                 /* post increment */
9166                 tcg_gen_addi_i32(addr, addr, 4);
9167             } else {
9168                 /* post decrement */
9169                 tcg_gen_addi_i32(addr, addr, -(n * 4));
9170             }
9171         } else if (!a->i && n != 1) {
9172             /* pre decrement */
9173             tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
9174         }
9175         store_reg(s, a->rn, addr);
9176     } else {
9177         tcg_temp_free_i32(addr);
9178     }
9179 }
9180
9181 static bool op_stm(DisasContext *s, arg_ldst_block *a, int min_n)
9182 {
9183     int i, j, n, list, mem_idx;
9184     bool user = a->u;
9185     TCGv_i32 addr, tmp, tmp2;
9186
9187     if (user) {
9188         /* STM (user) */
9189         if (IS_USER(s)) {
9190             /* Only usable in supervisor mode.  */
9191             unallocated_encoding(s);
9192             return true;
9193         }
9194     }
9195
9196     list = a->list;
9197     n = ctpop16(list);
9198     if (n < min_n || a->rn == 15) {
9199         unallocated_encoding(s);
9200         return true;
9201     }
9202
9203     addr = op_addr_block_pre(s, a, n);
9204     mem_idx = get_mem_index(s);
9205
9206     for (i = j = 0; i < 16; i++) {
9207         if (!(list & (1 << i))) {
9208             continue;
9209         }
9210
9211         if (user && i != 15) {
9212             tmp = tcg_temp_new_i32();
9213             tmp2 = tcg_const_i32(i);
9214             gen_helper_get_user_reg(tmp, cpu_env, tmp2);
9215             tcg_temp_free_i32(tmp2);
9216         } else {
9217             tmp = load_reg(s, i);
9218         }
9219         gen_aa32_st32(s, tmp, addr, mem_idx);
9220         tcg_temp_free_i32(tmp);
9221
9222         /* No need to add after the last transfer.  */
9223         if (++j != n) {
9224             tcg_gen_addi_i32(addr, addr, 4);
9225         }
9226     }
9227
9228     op_addr_block_post(s, a, addr, n);
9229     return true;
9230 }
9231
9232 static bool trans_STM(DisasContext *s, arg_ldst_block *a)
9233 {
9234     /* BitCount(list) < 1 is UNPREDICTABLE */
9235     return op_stm(s, a, 1);
9236 }
9237
9238 static bool trans_STM_t32(DisasContext *s, arg_ldst_block *a)
9239 {
9240     /* Writeback register in register list is UNPREDICTABLE for T32.  */
9241     if (a->w && (a->list & (1 << a->rn))) {
9242         unallocated_encoding(s);
9243         return true;
9244     }
9245     /* BitCount(list) < 2 is UNPREDICTABLE */
9246     return op_stm(s, a, 2);
9247 }
9248
9249 static bool do_ldm(DisasContext *s, arg_ldst_block *a, int min_n)
9250 {
9251     int i, j, n, list, mem_idx;
9252     bool loaded_base;
9253     bool user = a->u;
9254     bool exc_return = false;
9255     TCGv_i32 addr, tmp, tmp2, loaded_var;
9256
9257     if (user) {
9258         /* LDM (user), LDM (exception return) */
9259         if (IS_USER(s)) {
9260             /* Only usable in supervisor mode.  */
9261             unallocated_encoding(s);
9262             return true;
9263         }
9264         if (extract32(a->list, 15, 1)) {
9265             exc_return = true;
9266             user = false;
9267         } else {
9268             /* LDM (user) does not allow writeback.  */
9269             if (a->w) {
9270                 unallocated_encoding(s);
9271                 return true;
9272             }
9273         }
9274     }
9275
9276     list = a->list;
9277     n = ctpop16(list);
9278     if (n < min_n || a->rn == 15) {
9279         unallocated_encoding(s);
9280         return true;
9281     }
9282
9283     addr = op_addr_block_pre(s, a, n);
9284     mem_idx = get_mem_index(s);
9285     loaded_base = false;
9286     loaded_var = NULL;
9287
9288     for (i = j = 0; i < 16; i++) {
9289         if (!(list & (1 << i))) {
9290             continue;
9291         }
9292
9293         tmp = tcg_temp_new_i32();
9294         gen_aa32_ld32u(s, tmp, addr, mem_idx);
9295         if (user) {
9296             tmp2 = tcg_const_i32(i);
9297             gen_helper_set_user_reg(cpu_env, tmp2, tmp);
9298             tcg_temp_free_i32(tmp2);
9299             tcg_temp_free_i32(tmp);
9300         } else if (i == a->rn) {
9301             loaded_var = tmp;
9302             loaded_base = true;
9303         } else if (i == 15 && exc_return) {
9304             store_pc_exc_ret(s, tmp);
9305         } else {
9306             store_reg_from_load(s, i, tmp);
9307         }
9308
9309         /* No need to add after the last transfer.  */
9310         if (++j != n) {
9311             tcg_gen_addi_i32(addr, addr, 4);
9312         }
9313     }
9314
9315     op_addr_block_post(s, a, addr, n);
9316
9317     if (loaded_base) {
9318         /* Note that we reject base == pc above.  */
9319         store_reg(s, a->rn, loaded_var);
9320     }
9321
9322     if (exc_return) {
9323         /* Restore CPSR from SPSR.  */
9324         tmp = load_cpu_field(spsr);
9325         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
9326             gen_io_start();
9327         }
9328         gen_helper_cpsr_write_eret(cpu_env, tmp);
9329         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
9330             gen_io_end();
9331         }
9332         tcg_temp_free_i32(tmp);
9333         /* Must exit loop to check un-masked IRQs */
9334         s->base.is_jmp = DISAS_EXIT;
9335     }
9336     return true;
9337 }
9338
9339 static bool trans_LDM_a32(DisasContext *s, arg_ldst_block *a)
9340 {
9341     /*
9342      * Writeback register in register list is UNPREDICTABLE
9343      * for ArchVersion() >= 7.  Prior to v7, A32 would write
9344      * an UNKNOWN value to the base register.
9345      */
9346     if (ENABLE_ARCH_7 && a->w && (a->list & (1 << a->rn))) {
9347         unallocated_encoding(s);
9348         return true;
9349     }
9350     /* BitCount(list) < 1 is UNPREDICTABLE */
9351     return do_ldm(s, a, 1);
9352 }
9353
9354 static bool trans_LDM_t32(DisasContext *s, arg_ldst_block *a)
9355 {
9356     /* Writeback register in register list is UNPREDICTABLE for T32. */
9357     if (a->w && (a->list & (1 << a->rn))) {
9358         unallocated_encoding(s);
9359         return true;
9360     }
9361     /* BitCount(list) < 2 is UNPREDICTABLE */
9362     return do_ldm(s, a, 2);
9363 }
9364
9365 static bool trans_LDM_t16(DisasContext *s, arg_ldst_block *a)
9366 {
9367     /* Writeback is conditional on the base register not being loaded.  */
9368     a->w = !(a->list & (1 << a->rn));
9369     /* BitCount(list) < 1 is UNPREDICTABLE */
9370     return do_ldm(s, a, 1);
9371 }
9372
9373 /*
9374  * Branch, branch with link
9375  */
9376
9377 static bool trans_B(DisasContext *s, arg_i *a)
9378 {
9379     gen_jmp(s, read_pc(s) + a->imm);
9380     return true;
9381 }
9382
9383 static bool trans_B_cond_thumb(DisasContext *s, arg_ci *a)
9384 {
9385     /* This has cond from encoding, required to be outside IT block.  */
9386     if (a->cond >= 0xe) {
9387         return false;
9388     }
9389     if (s->condexec_mask) {
9390         unallocated_encoding(s);
9391         return true;
9392     }
9393     arm_skip_unless(s, a->cond);
9394     gen_jmp(s, read_pc(s) + a->imm);
9395     return true;
9396 }
9397
9398 static bool trans_BL(DisasContext *s, arg_i *a)
9399 {
9400     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
9401     gen_jmp(s, read_pc(s) + a->imm);
9402     return true;
9403 }
9404
9405 static bool trans_BLX_i(DisasContext *s, arg_BLX_i *a)
9406 {
9407     TCGv_i32 tmp;
9408
9409     /* For A32, ARCH(5) is checked near the start of the uncond block. */
9410     if (s->thumb && (a->imm & 2)) {
9411         return false;
9412     }
9413     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | s->thumb);
9414     tmp = tcg_const_i32(!s->thumb);
9415     store_cpu_field(tmp, thumb);
9416     gen_jmp(s, (read_pc(s) & ~3) + a->imm);
9417     return true;
9418 }
9419
9420 static bool trans_BL_BLX_prefix(DisasContext *s, arg_BL_BLX_prefix *a)
9421 {
9422     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
9423     tcg_gen_movi_i32(cpu_R[14], read_pc(s) + (a->imm << 12));
9424     return true;
9425 }
9426
9427 static bool trans_BL_suffix(DisasContext *s, arg_BL_suffix *a)
9428 {
9429     TCGv_i32 tmp = tcg_temp_new_i32();
9430
9431     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
9432     tcg_gen_addi_i32(tmp, cpu_R[14], (a->imm << 1) | 1);
9433     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
9434     gen_bx(s, tmp);
9435     return true;
9436 }
9437
9438 static bool trans_BLX_suffix(DisasContext *s, arg_BLX_suffix *a)
9439 {
9440     TCGv_i32 tmp;
9441
9442     assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
9443     if (!ENABLE_ARCH_5) {
9444         return false;
9445     }
9446     tmp = tcg_temp_new_i32();
9447     tcg_gen_addi_i32(tmp, cpu_R[14], a->imm << 1);
9448     tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
9449     tcg_gen_movi_i32(cpu_R[14], s->base.pc_next | 1);
9450     gen_bx(s, tmp);
9451     return true;
9452 }
9453
9454 static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
9455 {
9456     TCGv_i32 addr, tmp;
9457
9458     tmp = load_reg(s, a->rm);
9459     if (half) {
9460         tcg_gen_add_i32(tmp, tmp, tmp);
9461     }
9462     addr = load_reg(s, a->rn);
9463     tcg_gen_add_i32(addr, addr, tmp);
9464
9465     gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
9466                     half ? MO_UW | s->be_data : MO_UB);
9467     tcg_temp_free_i32(addr);
9468
9469     tcg_gen_add_i32(tmp, tmp, tmp);
9470     tcg_gen_addi_i32(tmp, tmp, read_pc(s));
9471     store_reg(s, 15, tmp);
9472     return true;
9473 }
9474
9475 static bool trans_TBB(DisasContext *s, arg_tbranch *a)
9476 {
9477     return op_tbranch(s, a, false);
9478 }
9479
9480 static bool trans_TBH(DisasContext *s, arg_tbranch *a)
9481 {
9482     return op_tbranch(s, a, true);
9483 }
9484
9485 static bool trans_CBZ(DisasContext *s, arg_CBZ *a)
9486 {
9487     TCGv_i32 tmp = load_reg(s, a->rn);
9488
9489     arm_gen_condlabel(s);
9490     tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
9491                         tmp, 0, s->condlabel);
9492     tcg_temp_free_i32(tmp);
9493     gen_jmp(s, read_pc(s) + a->imm);
9494     return true;
9495 }
9496
9497 /*
9498  * Supervisor call - both T32 & A32 come here so we need to check
9499  * which mode we are in when checking for semihosting.
9500  */
9501
9502 static bool trans_SVC(DisasContext *s, arg_SVC *a)
9503 {
9504     const uint32_t semihost_imm = s->thumb ? 0xab : 0x123456;
9505
9506     if (!arm_dc_feature(s, ARM_FEATURE_M) && semihosting_enabled() &&
9507 #ifndef CONFIG_USER_ONLY
9508         !IS_USER(s) &&
9509 #endif
9510         (a->imm == semihost_imm)) {
9511         gen_exception_internal_insn(s, s->pc_curr, EXCP_SEMIHOST);
9512     } else {
9513         gen_set_pc_im(s, s->base.pc_next);
9514         s->svc_imm = a->imm;
9515         s->base.is_jmp = DISAS_SWI;
9516     }
9517     return true;
9518 }
9519
9520 /*
9521  * Unconditional system instructions
9522  */
9523
9524 static bool trans_RFE(DisasContext *s, arg_RFE *a)
9525 {
9526     static const int8_t pre_offset[4] = {
9527         /* DA */ -4, /* IA */ 0, /* DB */ -8, /* IB */ 4
9528     };
9529     static const int8_t post_offset[4] = {
9530         /* DA */ -8, /* IA */ 4, /* DB */ -4, /* IB */ 0
9531     };
9532     TCGv_i32 addr, t1, t2;
9533
9534     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
9535         return false;
9536     }
9537     if (IS_USER(s)) {
9538         unallocated_encoding(s);
9539         return true;
9540     }
9541
9542     addr = load_reg(s, a->rn);
9543     tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
9544
9545     /* Load PC into tmp and CPSR into tmp2.  */
9546     t1 = tcg_temp_new_i32();
9547     gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
9548     tcg_gen_addi_i32(addr, addr, 4);
9549     t2 = tcg_temp_new_i32();
9550     gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
9551
9552     if (a->w) {
9553         /* Base writeback.  */
9554         tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
9555         store_reg(s, a->rn, addr);
9556     } else {
9557         tcg_temp_free_i32(addr);
9558     }
9559     gen_rfe(s, t1, t2);
9560     return true;
9561 }
9562
9563 static bool trans_SRS(DisasContext *s, arg_SRS *a)
9564 {
9565     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
9566         return false;
9567     }
9568     gen_srs(s, a->mode, a->pu, a->w);
9569     return true;
9570 }
9571
9572 static bool trans_CPS(DisasContext *s, arg_CPS *a)
9573 {
9574     uint32_t mask, val;
9575
9576     if (!ENABLE_ARCH_6 || arm_dc_feature(s, ARM_FEATURE_M)) {
9577         return false;
9578     }
9579     if (IS_USER(s)) {
9580         /* Implemented as NOP in user mode.  */
9581         return true;
9582     }
9583     /* TODO: There are quite a lot of UNPREDICTABLE argument combinations. */
9584
9585     mask = val = 0;
9586     if (a->imod & 2) {
9587         if (a->A) {
9588             mask |= CPSR_A;
9589         }
9590         if (a->I) {
9591             mask |= CPSR_I;
9592         }
9593         if (a->F) {
9594             mask |= CPSR_F;
9595         }
9596         if (a->imod & 1) {
9597             val |= mask;
9598         }
9599     }
9600     if (a->M) {
9601         mask |= CPSR_M;
9602         val |= a->mode;
9603     }
9604     if (mask) {
9605         gen_set_psr_im(s, mask, 0, val);
9606     }
9607     return true;
9608 }
9609
9610 static bool trans_CPS_v7m(DisasContext *s, arg_CPS_v7m *a)
9611 {
9612     TCGv_i32 tmp, addr, el;
9613
9614     if (!arm_dc_feature(s, ARM_FEATURE_M)) {
9615         return false;
9616     }
9617     if (IS_USER(s)) {
9618         /* Implemented as NOP in user mode.  */
9619         return true;
9620     }
9621
9622     tmp = tcg_const_i32(a->im);
9623     /* FAULTMASK */
9624     if (a->F) {
9625         addr = tcg_const_i32(19);
9626         gen_helper_v7m_msr(cpu_env, addr, tmp);
9627         tcg_temp_free_i32(addr);
9628     }
9629     /* PRIMASK */
9630     if (a->I) {
9631         addr = tcg_const_i32(16);
9632         gen_helper_v7m_msr(cpu_env, addr, tmp);
9633         tcg_temp_free_i32(addr);
9634     }
9635     el = tcg_const_i32(s->current_el);
9636     gen_helper_rebuild_hflags_m32(cpu_env, el);
9637     tcg_temp_free_i32(el);
9638     tcg_temp_free_i32(tmp);
9639     gen_lookup_tb(s);
9640     return true;
9641 }
9642
9643 /*
9644  * Clear-Exclusive, Barriers
9645  */
9646
9647 static bool trans_CLREX(DisasContext *s, arg_CLREX *a)
9648 {
9649     if (s->thumb
9650         ? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
9651         : !ENABLE_ARCH_6K) {
9652         return false;
9653     }
9654     gen_clrex(s);
9655     return true;
9656 }
9657
9658 static bool trans_DSB(DisasContext *s, arg_DSB *a)
9659 {
9660     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
9661         return false;
9662     }
9663     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
9664     return true;
9665 }
9666
9667 static bool trans_DMB(DisasContext *s, arg_DMB *a)
9668 {
9669     return trans_DSB(s, NULL);
9670 }
9671
9672 static bool trans_ISB(DisasContext *s, arg_ISB *a)
9673 {
9674     if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
9675         return false;
9676     }
9677     /*
9678      * We need to break the TB after this insn to execute
9679      * self-modifying code correctly and also to take
9680      * any pending interrupts immediately.
9681      */
9682     gen_goto_tb(s, 0, s->base.pc_next);
9683     return true;
9684 }
9685
9686 static bool trans_SB(DisasContext *s, arg_SB *a)
9687 {
9688     if (!dc_isar_feature(aa32_sb, s)) {
9689         return false;
9690     }
9691     /*
9692      * TODO: There is no speculation barrier opcode
9693      * for TCG; MB and end the TB instead.
9694      */
9695     tcg_gen_mb(TCG_MO_ALL | TCG_BAR_SC);
9696     gen_goto_tb(s, 0, s->base.pc_next);
9697     return true;
9698 }
9699
9700 static bool trans_SETEND(DisasContext *s, arg_SETEND *a)
9701 {
9702     if (!ENABLE_ARCH_6) {
9703         return false;
9704     }
9705     if (a->E != (s->be_data == MO_BE)) {
9706         gen_helper_setend(cpu_env);
9707         s->base.is_jmp = DISAS_UPDATE;
9708     }
9709     return true;
9710 }
9711
9712 /*
9713  * Preload instructions
9714  * All are nops, contingent on the appropriate arch level.
9715  */
9716
9717 static bool trans_PLD(DisasContext *s, arg_PLD *a)
9718 {
9719     return ENABLE_ARCH_5TE;
9720 }
9721
9722 static bool trans_PLDW(DisasContext *s, arg_PLD *a)
9723 {
9724     return arm_dc_feature(s, ARM_FEATURE_V7MP);
9725 }
9726
9727 static bool trans_PLI(DisasContext *s, arg_PLD *a)
9728 {
9729     return ENABLE_ARCH_7;
9730 }
9731
9732 /*
9733  * If-then
9734  */
9735
9736 static bool trans_IT(DisasContext *s, arg_IT *a)
9737 {
9738     int cond_mask = a->cond_mask;
9739
9740     /*
9741      * No actual code generated for this insn, just setup state.
9742      *
9743      * Combinations of firstcond and mask which set up an 0b1111
9744      * condition are UNPREDICTABLE; we take the CONSTRAINED
9745      * UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
9746      * i.e. both meaning "execute always".
9747      */
9748     s->condexec_cond = (cond_mask >> 4) & 0xe;
9749     s->condexec_mask = cond_mask & 0x1f;
9750     return true;
9751 }
9752
9753 /*
9754  * Legacy decoder.
9755  */
9756
9757 static void disas_arm_insn(DisasContext *s, unsigned int insn)
9758 {
9759     unsigned int cond = insn >> 28;
9760
9761     /* M variants do not implement ARM mode; this must raise the INVSTATE
9762      * UsageFault exception.
9763      */
9764     if (arm_dc_feature(s, ARM_FEATURE_M)) {
9765         gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
9766                            default_exception_el(s));
9767         return;
9768     }
9769
9770     if (cond == 0xf) {
9771         /* In ARMv3 and v4 the NV condition is UNPREDICTABLE; we
9772          * choose to UNDEF. In ARMv5 and above the space is used
9773          * for miscellaneous unconditional instructions.
9774          */
9775         ARCH(5);
9776
9777         /* Unconditional instructions.  */
9778         /* TODO: Perhaps merge these into one decodetree output file.  */
9779         if (disas_a32_uncond(s, insn) ||
9780             disas_vfp_uncond(s, insn) ||
9781             disas_neon_dp(s, insn) ||
9782             disas_neon_ls(s, insn) ||
9783             disas_neon_shared(s, insn)) {
9784             return;
9785         }
9786         /* fall back to legacy decoder */
9787
9788         if (((insn >> 25) & 7) == 1) {
9789             /* NEON Data processing.  */
9790             if (disas_neon_data_insn(s, insn)) {
9791                 goto illegal_op;
9792             }
9793             return;
9794         }
9795         if ((insn & 0x0e000f00) == 0x0c000100) {
9796             if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
9797                 /* iWMMXt register transfer.  */
9798                 if (extract32(s->c15_cpar, 1, 1)) {
9799                     if (!disas_iwmmxt_insn(s, insn)) {
9800                         return;
9801                     }
9802                 }
9803             }
9804         }
9805         goto illegal_op;
9806     }
9807     if (cond != 0xe) {
9808         /* if not always execute, we generate a conditional jump to
9809            next instruction */
9810         arm_skip_unless(s, cond);
9811     }
9812
9813     /* TODO: Perhaps merge these into one decodetree output file.  */
9814     if (disas_a32(s, insn) ||
9815         disas_vfp(s, insn)) {
9816         return;
9817     }
9818     /* fall back to legacy decoder */
9819
9820     switch ((insn >> 24) & 0xf) {
9821     case 0xc:
9822     case 0xd:
9823     case 0xe:
9824         if (((insn >> 8) & 0xe) == 10) {
9825             /* VFP, but failed disas_vfp.  */
9826             goto illegal_op;
9827         }
9828         if (disas_coproc_insn(s, insn)) {
9829             /* Coprocessor.  */
9830             goto illegal_op;
9831         }
9832         break;
9833     default:
9834     illegal_op:
9835         unallocated_encoding(s);
9836         break;
9837     }
9838 }
9839
9840 static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
9841 {
9842     /*
9843      * Return true if this is a 16 bit instruction. We must be precise
9844      * about this (matching the decode).
9845      */
9846     if ((insn >> 11) < 0x1d) {
9847         /* Definitely a 16-bit instruction */
9848         return true;
9849     }
9850
9851     /* Top five bits 0b11101 / 0b11110 / 0b11111 : this is the
9852      * first half of a 32-bit Thumb insn. Thumb-1 cores might
9853      * end up actually treating this as two 16-bit insns, though,
9854      * if it's half of a bl/blx pair that might span a page boundary.
9855      */
9856     if (arm_dc_feature(s, ARM_FEATURE_THUMB2) ||
9857         arm_dc_feature(s, ARM_FEATURE_M)) {
9858         /* Thumb2 cores (including all M profile ones) always treat
9859          * 32-bit insns as 32-bit.
9860          */
9861         return false;
9862     }
9863
9864     if ((insn >> 11) == 0x1e && pc - s->page_start < TARGET_PAGE_SIZE - 3) {
9865         /* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix
9866          * is not on the next page; we merge this into a 32-bit
9867          * insn.
9868          */
9869         return false;
9870     }
9871     /* 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);
9872      * 0b1111_1xxx_xxxx_xxxx : BL suffix;
9873      * 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
9874      *  -- handle as single 16 bit insn
9875      */
9876     return true;
9877 }
9878
9879 /* Translate a 32-bit thumb instruction. */
9880 static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
9881 {
9882     /*
9883      * ARMv6-M supports a limited subset of Thumb2 instructions.
9884      * Other Thumb1 architectures allow only 32-bit
9885      * combined BL/BLX prefix and suffix.
9886      */
9887     if (arm_dc_feature(s, ARM_FEATURE_M) &&
9888         !arm_dc_feature(s, ARM_FEATURE_V7)) {
9889         int i;
9890         bool found = false;
9891         static const uint32_t armv6m_insn[] = {0xf3808000 /* msr */,
9892                                                0xf3b08040 /* dsb */,
9893                                                0xf3b08050 /* dmb */,
9894                                                0xf3b08060 /* isb */,
9895                                                0xf3e08000 /* mrs */,
9896                                                0xf000d000 /* bl */};
9897         static const uint32_t armv6m_mask[] = {0xffe0d000,
9898                                                0xfff0d0f0,
9899                                                0xfff0d0f0,
9900                                                0xfff0d0f0,
9901                                                0xffe0d000,
9902                                                0xf800d000};
9903
9904         for (i = 0; i < ARRAY_SIZE(armv6m_insn); i++) {
9905             if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
9906                 found = true;
9907                 break;
9908             }
9909         }
9910         if (!found) {
9911             goto illegal_op;
9912         }
9913     } else if ((insn & 0xf800e800) != 0xf000e800)  {
9914         ARCH(6T2);
9915     }
9916
9917     if ((insn & 0xef000000) == 0xef000000) {
9918         /*
9919          * T32 encodings 0b111p_1111_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9920          * transform into
9921          * A32 encodings 0b1111_001p_qqqq_qqqq_qqqq_qqqq_qqqq_qqqq
9922          */
9923         uint32_t a32_insn = (insn & 0xe2ffffff) |
9924             ((insn & (1 << 28)) >> 4) | (1 << 28);
9925
9926         if (disas_neon_dp(s, a32_insn)) {
9927             return;
9928         }
9929     }
9930
9931     if ((insn & 0xff100000) == 0xf9000000) {
9932         /*
9933          * T32 encodings 0b1111_1001_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9934          * transform into
9935          * A32 encodings 0b1111_0100_ppp0_qqqq_qqqq_qqqq_qqqq_qqqq
9936          */
9937         uint32_t a32_insn = (insn & 0x00ffffff) | 0xf4000000;
9938
9939         if (disas_neon_ls(s, a32_insn)) {
9940             return;
9941         }
9942     }
9943
9944     /*
9945      * TODO: Perhaps merge these into one decodetree output file.
9946      * Note disas_vfp is written for a32 with cond field in the
9947      * top nibble.  The t32 encoding requires 0xe in the top nibble.
9948      */
9949     if (disas_t32(s, insn) ||
9950         disas_vfp_uncond(s, insn) ||
9951         disas_neon_shared(s, insn) ||
9952         ((insn >> 28) == 0xe && disas_vfp(s, insn))) {
9953         return;
9954     }
9955     /* fall back to legacy decoder */
9956
9957     switch ((insn >> 25) & 0xf) {
9958     case 0: case 1: case 2: case 3:
9959         /* 16-bit instructions.  Should never happen.  */
9960         abort();
9961     case 6: case 7: case 14: case 15:
9962         /* Coprocessor.  */
9963         if (arm_dc_feature(s, ARM_FEATURE_M)) {
9964             /* 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx */
9965             if (extract32(insn, 24, 2) == 3) {
9966                 goto illegal_op; /* op0 = 0b11 : unallocated */
9967             }
9968
9969             if (((insn >> 8) & 0xe) == 10 &&
9970                 dc_isar_feature(aa32_fpsp_v2, s)) {
9971                 /* FP, and the CPU supports it */
9972                 goto illegal_op;
9973             } else {
9974                 /* All other insns: NOCP */
9975                 gen_exception_insn(s, s->pc_curr, EXCP_NOCP,
9976                                    syn_uncategorized(),
9977                                    default_exception_el(s));
9978             }
9979             break;
9980         }
9981         if (((insn >> 24) & 3) == 3) {
9982             /* Translate into the equivalent ARM encoding.  */
9983             insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4) | (1 << 28);
9984             if (disas_neon_data_insn(s, insn)) {
9985                 goto illegal_op;
9986             }
9987         } else if (((insn >> 8) & 0xe) == 10) {
9988             /* VFP, but failed disas_vfp.  */
9989             goto illegal_op;
9990         } else {
9991             if (insn & (1 << 28))
9992                 goto illegal_op;
9993             if (disas_coproc_insn(s, insn)) {
9994                 goto illegal_op;
9995             }
9996         }
9997         break;
9998     case 12:
9999         goto illegal_op;
10000     default:
10001     illegal_op:
10002         unallocated_encoding(s);
10003     }
10004 }
10005
10006 static void disas_thumb_insn(DisasContext *s, uint32_t insn)
10007 {
10008     if (!disas_t16(s, insn)) {
10009         unallocated_encoding(s);
10010     }
10011 }
10012
10013 static bool insn_crosses_page(CPUARMState *env, DisasContext *s)
10014 {
10015     /* Return true if the insn at dc->base.pc_next might cross a page boundary.
10016      * (False positives are OK, false negatives are not.)
10017      * We know this is a Thumb insn, and our caller ensures we are
10018      * only called if dc->base.pc_next is less than 4 bytes from the page
10019      * boundary, so we cross the page if the first 16 bits indicate
10020      * that this is a 32 bit insn.
10021      */
10022     uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
10023
10024     return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
10025 }
10026
10027 static void arm_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
10028 {
10029     DisasContext *dc = container_of(dcbase, DisasContext, base);
10030     CPUARMState *env = cs->env_ptr;
10031     ARMCPU *cpu = env_archcpu(env);
10032     uint32_t tb_flags = dc->base.tb->flags;
10033     uint32_t condexec, core_mmu_idx;
10034
10035     dc->isar = &cpu->isar;
10036     dc->condjmp = 0;
10037
10038     dc->aarch64 = 0;
10039     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
10040      * there is no secure EL1, so we route exceptions to EL3.
10041      */
10042     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
10043                                !arm_el_is_aa64(env, 3);
10044     dc->thumb = FIELD_EX32(tb_flags, TBFLAG_AM32, THUMB);
10045     dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10046     condexec = FIELD_EX32(tb_flags, TBFLAG_AM32, CONDEXEC);
10047     dc->condexec_mask = (condexec & 0xf) << 1;
10048     dc->condexec_cond = condexec >> 4;
10049
10050     core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
10051     dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
10052     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10053 #if !defined(CONFIG_USER_ONLY)
10054     dc->user = (dc->current_el == 0);
10055 #endif
10056     dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
10057
10058     if (arm_feature(env, ARM_FEATURE_M)) {
10059         dc->vfp_enabled = 1;
10060         dc->be_data = MO_TE;
10061         dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_M32, HANDLER);
10062         dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
10063             regime_is_secure(env, dc->mmu_idx);
10064         dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_M32, STACKCHECK);
10065         dc->v8m_fpccr_s_wrong =
10066             FIELD_EX32(tb_flags, TBFLAG_M32, FPCCR_S_WRONG);
10067         dc->v7m_new_fp_ctxt_needed =
10068             FIELD_EX32(tb_flags, TBFLAG_M32, NEW_FP_CTXT_NEEDED);
10069         dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_M32, LSPACT);
10070     } else {
10071         dc->be_data =
10072             FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10073         dc->debug_target_el =
10074             FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
10075         dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
10076         dc->hstr_active = FIELD_EX32(tb_flags, TBFLAG_A32, HSTR_ACTIVE);
10077         dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
10078         dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
10079         if (arm_feature(env, ARM_FEATURE_XSCALE)) {
10080             dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
10081         } else {
10082             dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
10083             dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
10084         }
10085     }
10086     dc->cp_regs = cpu->cp_regs;
10087     dc->features = env->features;
10088
10089     /* Single step state. The code-generation logic here is:
10090      *  SS_ACTIVE == 0:
10091      *   generate code with no special handling for single-stepping (except
10092      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10093      *   this happens anyway because those changes are all system register or
10094      *   PSTATE writes).
10095      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10096      *   emit code for one insn
10097      *   emit code to clear PSTATE.SS
10098      *   emit code to generate software step exception for completed step
10099      *   end TB (as usual for having generated an exception)
10100      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10101      *   emit code to generate a software step exception
10102      *   end the TB
10103      */
10104     dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
10105     dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
10106     dc->is_ldex = false;
10107
10108     dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
10109
10110     /* If architectural single step active, limit to 1.  */
10111     if (is_singlestepping(dc)) {
10112         dc->base.max_insns = 1;
10113     }
10114
10115     /* ARM is a fixed-length ISA.  Bound the number of insns to execute
10116        to those left on the page.  */
10117     if (!dc->thumb) {
10118         int bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
10119         dc->base.max_insns = MIN(dc->base.max_insns, bound);
10120     }
10121
10122     cpu_V0 = tcg_temp_new_i64();
10123     cpu_V1 = tcg_temp_new_i64();
10124     /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
10125     cpu_M0 = tcg_temp_new_i64();
10126 }
10127
10128 static void arm_tr_tb_start(DisasContextBase *dcbase, CPUState *cpu)
10129 {
10130     DisasContext *dc = container_of(dcbase, DisasContext, base);
10131
10132     /* A note on handling of the condexec (IT) bits:
10133      *
10134      * We want to avoid the overhead of having to write the updated condexec
10135      * bits back to the CPUARMState for every instruction in an IT block. So:
10136      * (1) if the condexec bits are not already zero then we write
10137      * zero back into the CPUARMState now. This avoids complications trying
10138      * to do it at the end of the block. (For example if we don't do this
10139      * it's hard to identify whether we can safely skip writing condexec
10140      * at the end of the TB, which we definitely want to do for the case
10141      * where a TB doesn't do anything with the IT state at all.)
10142      * (2) if we are going to leave the TB then we call gen_set_condexec()
10143      * which will write the correct value into CPUARMState if zero is wrong.
10144      * This is done both for leaving the TB at the end, and for leaving
10145      * it because of an exception we know will happen, which is done in
10146      * gen_exception_insn(). The latter is necessary because we need to
10147      * leave the TB with the PC/IT state just prior to execution of the
10148      * instruction which caused the exception.
10149      * (3) if we leave the TB unexpectedly (eg a data abort on a load)
10150      * then the CPUARMState will be wrong and we need to reset it.
10151      * This is handled in the same way as restoration of the
10152      * PC in these situations; we save the value of the condexec bits
10153      * for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
10154      * then uses this to restore them after an exception.
10155      *
10156      * Note that there are no instructions which can read the condexec
10157      * bits, and none which can write non-static values to them, so
10158      * we don't need to care about whether CPUARMState is correct in the
10159      * middle of a TB.
10160      */
10161
10162     /* Reset the conditional execution bits immediately. This avoids
10163        complications trying to do it at the end of the block.  */
10164     if (dc->condexec_mask || dc->condexec_cond) {
10165         TCGv_i32 tmp = tcg_temp_new_i32();
10166         tcg_gen_movi_i32(tmp, 0);
10167         store_cpu_field(tmp, condexec_bits);
10168     }
10169 }
10170
10171 static void arm_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
10172 {
10173     DisasContext *dc = container_of(dcbase, DisasContext, base);
10174
10175     tcg_gen_insn_start(dc->base.pc_next,
10176                        (dc->condexec_cond << 4) | (dc->condexec_mask >> 1),
10177                        0);
10178     dc->insn_start = tcg_last_op();
10179 }
10180
10181 static bool arm_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
10182                                     const CPUBreakpoint *bp)
10183 {
10184     DisasContext *dc = container_of(dcbase, DisasContext, base);
10185
10186     if (bp->flags & BP_CPU) {
10187         gen_set_condexec(dc);
10188         gen_set_pc_im(dc, dc->base.pc_next);
10189         gen_helper_check_breakpoints(cpu_env);
10190         /* End the TB early; it's likely not going to be executed */
10191         dc->base.is_jmp = DISAS_TOO_MANY;
10192     } else {
10193         gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
10194         /* The address covered by the breakpoint must be
10195            included in [tb->pc, tb->pc + tb->size) in order
10196            to for it to be properly cleared -- thus we
10197            increment the PC here so that the logic setting
10198            tb->size below does the right thing.  */
10199         /* TODO: Advance PC by correct instruction length to
10200          * avoid disassembler error messages */
10201         dc->base.pc_next += 2;
10202         dc->base.is_jmp = DISAS_NORETURN;
10203     }
10204
10205     return true;
10206 }
10207
10208 static bool arm_pre_translate_insn(DisasContext *dc)
10209 {
10210 #ifdef CONFIG_USER_ONLY
10211     /* Intercept jump to the magic kernel page.  */
10212     if (dc->base.pc_next >= 0xffff0000) {
10213         /* We always get here via a jump, so know we are not in a
10214            conditional execution block.  */
10215         gen_exception_internal(EXCP_KERNEL_TRAP);
10216         dc->base.is_jmp = DISAS_NORETURN;
10217         return true;
10218     }
10219 #endif
10220
10221     if (dc->ss_active && !dc->pstate_ss) {
10222         /* Singlestep state is Active-pending.
10223          * If we're in this state at the start of a TB then either
10224          *  a) we just took an exception to an EL which is being debugged
10225          *     and this is the first insn in the exception handler
10226          *  b) debug exceptions were masked and we just unmasked them
10227          *     without changing EL (eg by clearing PSTATE.D)
10228          * In either case we're going to take a swstep exception in the
10229          * "did not step an insn" case, and so the syndrome ISV and EX
10230          * bits should be zero.
10231          */
10232         assert(dc->base.num_insns == 1);
10233         gen_swstep_exception(dc, 0, 0);
10234         dc->base.is_jmp = DISAS_NORETURN;
10235         return true;
10236     }
10237
10238     return false;
10239 }
10240
10241 static void arm_post_translate_insn(DisasContext *dc)
10242 {
10243     if (dc->condjmp && !dc->base.is_jmp) {
10244         gen_set_label(dc->condlabel);
10245         dc->condjmp = 0;
10246     }
10247     translator_loop_temp_check(&dc->base);
10248 }
10249
10250 static void arm_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10251 {
10252     DisasContext *dc = container_of(dcbase, DisasContext, base);
10253     CPUARMState *env = cpu->env_ptr;
10254     unsigned int insn;
10255
10256     if (arm_pre_translate_insn(dc)) {
10257         return;
10258     }
10259
10260     dc->pc_curr = dc->base.pc_next;
10261     insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
10262     dc->insn = insn;
10263     dc->base.pc_next += 4;
10264     disas_arm_insn(dc, insn);
10265
10266     arm_post_translate_insn(dc);
10267
10268     /* ARM is a fixed-length ISA.  We performed the cross-page check
10269        in init_disas_context by adjusting max_insns.  */
10270 }
10271
10272 static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
10273 {
10274     /* Return true if this Thumb insn is always unconditional,
10275      * even inside an IT block. This is true of only a very few
10276      * instructions: BKPT, HLT, and SG.
10277      *
10278      * A larger class of instructions are UNPREDICTABLE if used
10279      * inside an IT block; we do not need to detect those here, because
10280      * what we do by default (perform the cc check and update the IT
10281      * bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
10282      * choice for those situations.
10283      *
10284      * insn is either a 16-bit or a 32-bit instruction; the two are
10285      * distinguishable because for the 16-bit case the top 16 bits
10286      * are zeroes, and that isn't a valid 32-bit encoding.
10287      */
10288     if ((insn & 0xffffff00) == 0xbe00) {
10289         /* BKPT */
10290         return true;
10291     }
10292
10293     if ((insn & 0xffffffc0) == 0xba80 && arm_dc_feature(s, ARM_FEATURE_V8) &&
10294         !arm_dc_feature(s, ARM_FEATURE_M)) {
10295         /* HLT: v8A only. This is unconditional even when it is going to
10296          * UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
10297          * For v7 cores this was a plain old undefined encoding and so
10298          * honours its cc check. (We might be using the encoding as
10299          * a semihosting trap, but we don't change the cc check behaviour
10300          * on that account, because a debugger connected to a real v7A
10301          * core and emulating semihosting traps by catching the UNDEF
10302          * exception would also only see cases where the cc check passed.
10303          * No guest code should be trying to do a HLT semihosting trap
10304          * in an IT block anyway.
10305          */
10306         return true;
10307     }
10308
10309     if (insn == 0xe97fe97f && arm_dc_feature(s, ARM_FEATURE_V8) &&
10310         arm_dc_feature(s, ARM_FEATURE_M)) {
10311         /* SG: v8M only */
10312         return true;
10313     }
10314
10315     return false;
10316 }
10317
10318 static void thumb_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
10319 {
10320     DisasContext *dc = container_of(dcbase, DisasContext, base);
10321     CPUARMState *env = cpu->env_ptr;
10322     uint32_t insn;
10323     bool is_16bit;
10324
10325     if (arm_pre_translate_insn(dc)) {
10326         return;
10327     }
10328
10329     dc->pc_curr = dc->base.pc_next;
10330     insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
10331     is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
10332     dc->base.pc_next += 2;
10333     if (!is_16bit) {
10334         uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
10335
10336         insn = insn << 16 | insn2;
10337         dc->base.pc_next += 2;
10338     }
10339     dc->insn = insn;
10340
10341     if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
10342         uint32_t cond = dc->condexec_cond;
10343
10344         /*
10345          * Conditionally skip the insn. Note that both 0xe and 0xf mean
10346          * "always"; 0xf is not "never".
10347          */
10348         if (cond < 0x0e) {
10349             arm_skip_unless(dc, cond);
10350         }
10351     }
10352
10353     if (is_16bit) {
10354         disas_thumb_insn(dc, insn);
10355     } else {
10356         disas_thumb2_insn(dc, insn);
10357     }
10358
10359     /* Advance the Thumb condexec condition.  */
10360     if (dc->condexec_mask) {
10361         dc->condexec_cond = ((dc->condexec_cond & 0xe) |
10362                              ((dc->condexec_mask >> 4) & 1));
10363         dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
10364         if (dc->condexec_mask == 0) {
10365             dc->condexec_cond = 0;
10366         }
10367     }
10368
10369     arm_post_translate_insn(dc);
10370
10371     /* Thumb is a variable-length ISA.  Stop translation when the next insn
10372      * will touch a new page.  This ensures that prefetch aborts occur at
10373      * the right place.
10374      *
10375      * We want to stop the TB if the next insn starts in a new page,
10376      * or if it spans between this page and the next. This means that
10377      * if we're looking at the last halfword in the page we need to
10378      * see if it's a 16-bit Thumb insn (which will fit in this TB)
10379      * or a 32-bit Thumb insn (which won't).
10380      * This is to avoid generating a silly TB with a single 16-bit insn
10381      * in it at the end of this page (which would execute correctly
10382      * but isn't very efficient).
10383      */
10384     if (dc->base.is_jmp == DISAS_NEXT
10385         && (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
10386             || (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - 3
10387                 && insn_crosses_page(env, dc)))) {
10388         dc->base.is_jmp = DISAS_TOO_MANY;
10389     }
10390 }
10391
10392 static void arm_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
10393 {
10394     DisasContext *dc = container_of(dcbase, DisasContext, base);
10395
10396     if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
10397         /* FIXME: This can theoretically happen with self-modifying code. */
10398         cpu_abort(cpu, "IO on conditional branch instruction");
10399     }
10400
10401     /* At this stage dc->condjmp will only be set when the skipped
10402        instruction was a conditional branch or trap, and the PC has
10403        already been written.  */
10404     gen_set_condexec(dc);
10405     if (dc->base.is_jmp == DISAS_BX_EXCRET) {
10406         /* Exception return branches need some special case code at the
10407          * end of the TB, which is complex enough that it has to
10408          * handle the single-step vs not and the condition-failed
10409          * insn codepath itself.
10410          */
10411         gen_bx_excret_final_code(dc);
10412     } else if (unlikely(is_singlestepping(dc))) {
10413         /* Unconditional and "condition passed" instruction codepath. */
10414         switch (dc->base.is_jmp) {
10415         case DISAS_SWI:
10416             gen_ss_advance(dc);
10417             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
10418                           default_exception_el(dc));
10419             break;
10420         case DISAS_HVC:
10421             gen_ss_advance(dc);
10422             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
10423             break;
10424         case DISAS_SMC:
10425             gen_ss_advance(dc);
10426             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
10427             break;
10428         case DISAS_NEXT:
10429         case DISAS_TOO_MANY:
10430         case DISAS_UPDATE:
10431             gen_set_pc_im(dc, dc->base.pc_next);
10432             /* fall through */
10433         default:
10434             /* FIXME: Single stepping a WFI insn will not halt the CPU. */
10435             gen_singlestep_exception(dc);
10436             break;
10437         case DISAS_NORETURN:
10438             break;
10439         }
10440     } else {
10441         /* While branches must always occur at the end of an IT block,
10442            there are a few other things that can cause us to terminate
10443            the TB in the middle of an IT block:
10444             - Exception generating instructions (bkpt, swi, undefined).
10445             - Page boundaries.
10446             - Hardware watchpoints.
10447            Hardware breakpoints have already been handled and skip this code.
10448          */
10449         switch(dc->base.is_jmp) {
10450         case DISAS_NEXT:
10451         case DISAS_TOO_MANY:
10452             gen_goto_tb(dc, 1, dc->base.pc_next);
10453             break;
10454         case DISAS_JUMP:
10455             gen_goto_ptr();
10456             break;
10457         case DISAS_UPDATE:
10458             gen_set_pc_im(dc, dc->base.pc_next);
10459             /* fall through */
10460         default:
10461             /* indicate that the hash table must be used to find the next TB */
10462             tcg_gen_exit_tb(NULL, 0);
10463             break;
10464         case DISAS_NORETURN:
10465             /* nothing more to generate */
10466             break;
10467         case DISAS_WFI:
10468         {
10469             TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
10470                                           !(dc->insn & (1U << 31))) ? 2 : 4);
10471
10472             gen_helper_wfi(cpu_env, tmp);
10473             tcg_temp_free_i32(tmp);
10474             /* The helper doesn't necessarily throw an exception, but we
10475              * must go back to the main loop to check for interrupts anyway.
10476              */
10477             tcg_gen_exit_tb(NULL, 0);
10478             break;
10479         }
10480         case DISAS_WFE:
10481             gen_helper_wfe(cpu_env);
10482             break;
10483         case DISAS_YIELD:
10484             gen_helper_yield(cpu_env);
10485             break;
10486         case DISAS_SWI:
10487             gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
10488                           default_exception_el(dc));
10489             break;
10490         case DISAS_HVC:
10491             gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), 2);
10492             break;
10493         case DISAS_SMC:
10494             gen_exception(EXCP_SMC, syn_aa32_smc(), 3);
10495             break;
10496         }
10497     }
10498
10499     if (dc->condjmp) {
10500         /* "Condition failed" instruction codepath for the branch/trap insn */
10501         gen_set_label(dc->condlabel);
10502         gen_set_condexec(dc);
10503         if (unlikely(is_singlestepping(dc))) {
10504             gen_set_pc_im(dc, dc->base.pc_next);
10505             gen_singlestep_exception(dc);
10506         } else {
10507             gen_goto_tb(dc, 1, dc->base.pc_next);
10508         }
10509     }
10510 }
10511
10512 static void arm_tr_disas_log(const DisasContextBase *dcbase, CPUState *cpu)
10513 {
10514     DisasContext *dc = container_of(dcbase, DisasContext, base);
10515
10516     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
10517     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
10518 }
10519
10520 static const TranslatorOps arm_translator_ops = {
10521     .init_disas_context = arm_tr_init_disas_context,
10522     .tb_start           = arm_tr_tb_start,
10523     .insn_start         = arm_tr_insn_start,
10524     .breakpoint_check   = arm_tr_breakpoint_check,
10525     .translate_insn     = arm_tr_translate_insn,
10526     .tb_stop            = arm_tr_tb_stop,
10527     .disas_log          = arm_tr_disas_log,
10528 };
10529
10530 static const TranslatorOps thumb_translator_ops = {
10531     .init_disas_context = arm_tr_init_disas_context,
10532     .tb_start           = arm_tr_tb_start,
10533     .insn_start         = arm_tr_insn_start,
10534     .breakpoint_check   = arm_tr_breakpoint_check,
10535     .translate_insn     = thumb_tr_translate_insn,
10536     .tb_stop            = arm_tr_tb_stop,
10537     .disas_log          = arm_tr_disas_log,
10538 };
10539
10540 /* generate intermediate code for basic block 'tb'.  */
10541 void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int max_insns)
10542 {
10543     DisasContext dc = { };
10544     const TranslatorOps *ops = &arm_translator_ops;
10545
10546     if (FIELD_EX32(tb->flags, TBFLAG_AM32, THUMB)) {
10547         ops = &thumb_translator_ops;
10548     }
10549 #ifdef TARGET_AARCH64
10550     if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
10551         ops = &aarch64_translator_ops;
10552     }
10553 #endif
10554
10555     translator_loop(ops, &dc.base, cpu, tb, max_insns);
10556 }
10557
10558 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb,
10559                           target_ulong *data)
10560 {
10561     if (is_a64(env)) {
10562         env->pc = data[0];
10563         env->condexec_bits = 0;
10564         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
10565     } else {
10566         env->regs[15] = data[0];
10567         env->condexec_bits = data[1];
10568         env->exception.syndrome = data[2] << ARM_INSN_START_WORD2_SHIFT;
10569     }
10570 }