target-arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "qemu/log.h"
  25 #include "arm_ldst.h"
  26 #include "translate.h"
  27 #include "internals.h"
  28 #include "qemu/host-utils.h"
  29
  30 #include "exec/semihost.h"
  31 #include "exec/gen-icount.h"
  32
  33 #include "exec/helper-proto.h"
  34 #include "exec/helper-gen.h"
  35 #include "exec/log.h"
  36
  37 #include "trace-tcg.h"
  38
  39 static TCGv_i64 cpu_X[32];
  40 static TCGv_i64 cpu_pc;
  41
  42 /* Load/store exclusive handling */
  43 static TCGv_i64 cpu_exclusive_high;
  44
  45 static const char *regnames[] = {
  46     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  47     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  48     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  49     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  50 };
  51
  52 enum a64_shift_type {
  53     A64_SHIFT_TYPE_LSL = 0,
  54     A64_SHIFT_TYPE_LSR = 1,
  55     A64_SHIFT_TYPE_ASR = 2,
  56     A64_SHIFT_TYPE_ROR = 3
  57 };
  58
  59 /* Table based decoder typedefs - used when the relevant bits for decode
  60  * are too awkwardly scattered across the instruction (eg SIMD).
  61  */
  62 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  63
  64 typedef struct AArch64DecodeTable {
  65     uint32_t pattern;
  66     uint32_t mask;
  67     AArch64DecodeFn *disas_fn;
  68 } AArch64DecodeTable;
  69
  70 /* Function prototype for gen_ functions for calling Neon helpers */
  71 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  72 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  73 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  74 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  75 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  76 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  77 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  78 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  79 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  80 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  81 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  82 typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
  83 typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
  84
  85 /* initialize TCG globals.  */
  86 void a64_translate_init(void)
  87 {
  88     int i;
  89
  90     cpu_pc = tcg_global_mem_new_i64(cpu_env,
  91                                     offsetof(CPUARMState, pc),
  92                                     "pc");
  93     for (i = 0; i < 32; i++) {
  94         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  95                                           offsetof(CPUARMState, xregs[i]),
  96                                           regnames[i]);
  97     }
  98
  99     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 100         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 101 }
 102
 103 static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
 104 {
 105     /* Return the mmu_idx to use for A64 "unprivileged load/store" insns:
 106      *  if EL1, access as if EL0; otherwise access at current EL
 107      */
 108     switch (s->mmu_idx) {
 109     case ARMMMUIdx_S12NSE1:
 110         return ARMMMUIdx_S12NSE0;
 111     case ARMMMUIdx_S1SE1:
 112         return ARMMMUIdx_S1SE0;
 113     case ARMMMUIdx_S2NS:
 114         g_assert_not_reached();
 115     default:
 116         return s->mmu_idx;
 117     }
 118 }
 119
 120 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 121                             fprintf_function cpu_fprintf, int flags)
 122 {
 123     ARMCPU *cpu = ARM_CPU(cs);
 124     CPUARMState *env = &cpu->env;
 125     uint32_t psr = pstate_read(env);
 126     int i;
 127     int el = arm_current_el(env);
 128     const char *ns_status;
 129
 130     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 131             env->pc, env->xregs[31]);
 132     for (i = 0; i < 31; i++) {
 133         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 134         if ((i % 4) == 3) {
 135             cpu_fprintf(f, "\n");
 136         } else {
 137             cpu_fprintf(f, " ");
 138         }
 139     }
 140
 141     if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 142         ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 143     } else {
 144         ns_status = "";
 145     }
 146
 147     cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 148                 psr,
 149                 psr & PSTATE_N ? 'N' : '-',
 150                 psr & PSTATE_Z ? 'Z' : '-',
 151                 psr & PSTATE_C ? 'C' : '-',
 152                 psr & PSTATE_V ? 'V' : '-',
 153                 ns_status,
 154                 el,
 155                 psr & PSTATE_SP ? 'h' : 't');
 156
 157     if (flags & CPU_DUMP_FPU) {
 158         int numvfpregs = 32;
 159         for (i = 0; i < numvfpregs; i += 2) {
 160             uint64_t vlo = float64_val(env->vfp.regs[i * 2]);
 161             uint64_t vhi = float64_val(env->vfp.regs[(i * 2) + 1]);
 162             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 " ",
 163                         i, vhi, vlo);
 164             vlo = float64_val(env->vfp.regs[(i + 1) * 2]);
 165             vhi = float64_val(env->vfp.regs[((i + 1) * 2) + 1]);
 166             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "\n",
 167                         i + 1, vhi, vlo);
 168         }
 169         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 170                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 171     }
 172 }
 173
 174 void gen_a64_set_pc_im(uint64_t val)
 175 {
 176     tcg_gen_movi_i64(cpu_pc, val);
 177 }
 178
 179 typedef struct DisasCompare64 {
 180     TCGCond cond;
 181     TCGv_i64 value;
 182 } DisasCompare64;
 183
 184 static void a64_test_cc(DisasCompare64 *c64, int cc)
 185 {
 186     DisasCompare c32;
 187
 188     arm_test_cc(&c32, cc);
 189
 190     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 191        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 192     c64->cond = c32.cond;
 193     c64->value = tcg_temp_new_i64();
 194     tcg_gen_ext_i32_i64(c64->value, c32.value);
 195
 196     arm_free_cc(&c32);
 197 }
 198
 199 static void a64_free_cc(DisasCompare64 *c64)
 200 {
 201     tcg_temp_free_i64(c64->value);
 202 }
 203
 204 static void gen_exception_internal(int excp)
 205 {
 206     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 207
 208     assert(excp_is_internal(excp));
 209     gen_helper_exception_internal(cpu_env, tcg_excp);
 210     tcg_temp_free_i32(tcg_excp);
 211 }
 212
 213 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 214 {
 215     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 216     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 217     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 218
 219     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 220                                        tcg_syn, tcg_el);
 221     tcg_temp_free_i32(tcg_el);
 222     tcg_temp_free_i32(tcg_syn);
 223     tcg_temp_free_i32(tcg_excp);
 224 }
 225
 226 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 227 {
 228     gen_a64_set_pc_im(s->pc - offset);
 229     gen_exception_internal(excp);
 230     s->is_jmp = DISAS_EXC;
 231 }
 232
 233 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 234                                uint32_t syndrome, uint32_t target_el)
 235 {
 236     gen_a64_set_pc_im(s->pc - offset);
 237     gen_exception(excp, syndrome, target_el);
 238     s->is_jmp = DISAS_EXC;
 239 }
 240
 241 static void gen_ss_advance(DisasContext *s)
 242 {
 243     /* If the singlestep state is Active-not-pending, advance to
 244      * Active-pending.
 245      */
 246     if (s->ss_active) {
 247         s->pstate_ss = 0;
 248         gen_helper_clear_pstate_ss(cpu_env);
 249     }
 250 }
 251
 252 static void gen_step_complete_exception(DisasContext *s)
 253 {
 254     /* We just completed step of an insn. Move from Active-not-pending
 255      * to Active-pending, and then also take the swstep exception.
 256      * This corresponds to making the (IMPDEF) choice to prioritize
 257      * swstep exceptions over asynchronous exceptions taken to an exception
 258      * level where debug is disabled. This choice has the advantage that
 259      * we do not need to maintain internal state corresponding to the
 260      * ISV/EX syndrome bits between completion of the step and generation
 261      * of the exception, and our syndrome information is always correct.
 262      */
 263     gen_ss_advance(s);
 264     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 265                   default_exception_el(s));
 266     s->is_jmp = DISAS_EXC;
 267 }
 268
 269 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 270 {
 271     /* No direct tb linking with singlestep (either QEMU's or the ARM
 272      * debug architecture kind) or deterministic io
 273      */
 274     if (s->singlestep_enabled || s->ss_active || (s->tb->cflags & CF_LAST_IO)) {
 275         return false;
 276     }
 277
 278 #ifndef CONFIG_USER_ONLY
 279     /* Only link tbs from inside the same guest page */
 280     if ((s->tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 281         return false;
 282     }
 283 #endif
 284
 285     return true;
 286 }
 287
 288 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 289 {
 290     TranslationBlock *tb;
 291
 292     tb = s->tb;
 293     if (use_goto_tb(s, n, dest)) {
 294         tcg_gen_goto_tb(n);
 295         gen_a64_set_pc_im(dest);
 296         tcg_gen_exit_tb((intptr_t)tb + n);
 297         s->is_jmp = DISAS_TB_JUMP;
 298     } else {
 299         gen_a64_set_pc_im(dest);
 300         if (s->ss_active) {
 301             gen_step_complete_exception(s);
 302         } else if (s->singlestep_enabled) {
 303             gen_exception_internal(EXCP_DEBUG);
 304         } else {
 305             tcg_gen_exit_tb(0);
 306             s->is_jmp = DISAS_TB_JUMP;
 307         }
 308     }
 309 }
 310
 311 static void disas_set_insn_syndrome(DisasContext *s, uint32_t syn)
 312 {
 313     /* We don't need to save all of the syndrome so we mask and shift
 314      * out uneeded bits to help the sleb128 encoder do a better job.
 315      */
 316     syn &= ARM_INSN_START_WORD2_MASK;
 317     syn >>= ARM_INSN_START_WORD2_SHIFT;
 318
 319     /* We check and clear insn_start_idx to catch multiple updates.  */
 320     assert(s->insn_start_idx != 0);
 321     tcg_set_insn_param(s->insn_start_idx, 2, syn);
 322     s->insn_start_idx = 0;
 323 }
 324
 325 static void unallocated_encoding(DisasContext *s)
 326 {
 327     /* Unallocated and reserved encodings are uncategorized */
 328     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 329                        default_exception_el(s));
 330 }
 331
 332 #define unsupported_encoding(s, insn)                                    \
 333     do {                                                                 \
 334         qemu_log_mask(LOG_UNIMP,                                         \
 335                       "%s:%d: unsupported instruction encoding 0x%08x "  \
 336                       "at pc=%016" PRIx64 "\n",                          \
 337                       __FILE__, __LINE__, insn, s->pc - 4);              \
 338         unallocated_encoding(s);                                         \
 339     } while (0);
 340
 341 static void init_tmp_a64_array(DisasContext *s)
 342 {
 343 #ifdef CONFIG_DEBUG_TCG
 344     int i;
 345     for (i = 0; i < ARRAY_SIZE(s->tmp_a64); i++) {
 346         TCGV_UNUSED_I64(s->tmp_a64[i]);
 347     }
 348 #endif
 349     s->tmp_a64_count = 0;
 350 }
 351
 352 static void free_tmp_a64(DisasContext *s)
 353 {
 354     int i;
 355     for (i = 0; i < s->tmp_a64_count; i++) {
 356         tcg_temp_free_i64(s->tmp_a64[i]);
 357     }
 358     init_tmp_a64_array(s);
 359 }
 360
 361 static TCGv_i64 new_tmp_a64(DisasContext *s)
 362 {
 363     assert(s->tmp_a64_count < TMP_A64_MAX);
 364     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 365 }
 366
 367 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 368 {
 369     TCGv_i64 t = new_tmp_a64(s);
 370     tcg_gen_movi_i64(t, 0);
 371     return t;
 372 }
 373
 374 /*
 375  * Register access functions
 376  *
 377  * These functions are used for directly accessing a register in where
 378  * changes to the final register value are likely to be made. If you
 379  * need to use a register for temporary calculation (e.g. index type
 380  * operations) use the read_* form.
 381  *
 382  * B1.2.1 Register mappings
 383  *
 384  * In instruction register encoding 31 can refer to ZR (zero register) or
 385  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 386  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 387  * This is the point of the _sp forms.
 388  */
 389 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 390 {
 391     if (reg == 31) {
 392         return new_tmp_a64_zero(s);
 393     } else {
 394         return cpu_X[reg];
 395     }
 396 }
 397
 398 /* register access for when 31 == SP */
 399 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 400 {
 401     return cpu_X[reg];
 402 }
 403
 404 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 405  * representing the register contents. This TCGv is an auto-freed
 406  * temporary so it need not be explicitly freed, and may be modified.
 407  */
 408 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 409 {
 410     TCGv_i64 v = new_tmp_a64(s);
 411     if (reg != 31) {
 412         if (sf) {
 413             tcg_gen_mov_i64(v, cpu_X[reg]);
 414         } else {
 415             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 416         }
 417     } else {
 418         tcg_gen_movi_i64(v, 0);
 419     }
 420     return v;
 421 }
 422
 423 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 424 {
 425     TCGv_i64 v = new_tmp_a64(s);
 426     if (sf) {
 427         tcg_gen_mov_i64(v, cpu_X[reg]);
 428     } else {
 429         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 430     }
 431     return v;
 432 }
 433
 434 /* We should have at some point before trying to access an FP register
 435  * done the necessary access check, so assert that
 436  * (a) we did the check and
 437  * (b) we didn't then just plough ahead anyway if it failed.
 438  * Print the instruction pattern in the abort message so we can figure
 439  * out what we need to fix if a user encounters this problem in the wild.
 440  */
 441 static inline void assert_fp_access_checked(DisasContext *s)
 442 {
 443 #ifdef CONFIG_DEBUG_TCG
 444     if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 445         fprintf(stderr, "target-arm: FP access check missing for "
 446                 "instruction 0x%08x\n", s->insn);
 447         abort();
 448     }
 449 #endif
 450 }
 451
 452 /* Return the offset into CPUARMState of an element of specified
 453  * size, 'element' places in from the least significant end of
 454  * the FP/vector register Qn.
 455  */
 456 static inline int vec_reg_offset(DisasContext *s, int regno,
 457                                  int element, TCGMemOp size)
 458 {
 459     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 460 #ifdef HOST_WORDS_BIGENDIAN
 461     /* This is complicated slightly because vfp.regs[2n] is
 462      * still the low half and  vfp.regs[2n+1] the high half
 463      * of the 128 bit vector, even on big endian systems.
 464      * Calculate the offset assuming a fully bigendian 128 bits,
 465      * then XOR to account for the order of the two 64 bit halves.
 466      */
 467     offs += (16 - ((element + 1) * (1 << size)));
 468     offs ^= 8;
 469 #else
 470     offs += element * (1 << size);
 471 #endif
 472     assert_fp_access_checked(s);
 473     return offs;
 474 }
 475
 476 /* Return the offset into CPUARMState of a slice (from
 477  * the least significant end) of FP register Qn (ie
 478  * Dn, Sn, Hn or Bn).
 479  * (Note that this is not the same mapping as for A32; see cpu.h)
 480  */
 481 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 482 {
 483     int offs = offsetof(CPUARMState, vfp.regs[regno * 2]);
 484 #ifdef HOST_WORDS_BIGENDIAN
 485     offs += (8 - (1 << size));
 486 #endif
 487     assert_fp_access_checked(s);
 488     return offs;
 489 }
 490
 491 /* Offset of the high half of the 128 bit vector Qn */
 492 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 493 {
 494     assert_fp_access_checked(s);
 495     return offsetof(CPUARMState, vfp.regs[regno * 2 + 1]);
 496 }
 497
 498 /* Convenience accessors for reading and writing single and double
 499  * FP registers. Writing clears the upper parts of the associated
 500  * 128 bit vector register, as required by the architecture.
 501  * Note that unlike the GP register accessors, the values returned
 502  * by the read functions must be manually freed.
 503  */
 504 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 505 {
 506     TCGv_i64 v = tcg_temp_new_i64();
 507
 508     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 509     return v;
 510 }
 511
 512 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 513 {
 514     TCGv_i32 v = tcg_temp_new_i32();
 515
 516     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 517     return v;
 518 }
 519
 520 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 521 {
 522     TCGv_i64 tcg_zero = tcg_const_i64(0);
 523
 524     tcg_gen_st_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 525     tcg_gen_st_i64(tcg_zero, cpu_env, fp_reg_hi_offset(s, reg));
 526     tcg_temp_free_i64(tcg_zero);
 527 }
 528
 529 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 530 {
 531     TCGv_i64 tmp = tcg_temp_new_i64();
 532
 533     tcg_gen_extu_i32_i64(tmp, v);
 534     write_fp_dreg(s, reg, tmp);
 535     tcg_temp_free_i64(tmp);
 536 }
 537
 538 static TCGv_ptr get_fpstatus_ptr(void)
 539 {
 540     TCGv_ptr statusptr = tcg_temp_new_ptr();
 541     int offset;
 542
 543     /* In A64 all instructions (both FP and Neon) use the FPCR;
 544      * there is no equivalent of the A32 Neon "standard FPSCR value"
 545      * and all operations use vfp.fp_status.
 546      */
 547     offset = offsetof(CPUARMState, vfp.fp_status);
 548     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 549     return statusptr;
 550 }
 551
 552 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 553  * than the 32 bit equivalent.
 554  */
 555 static inline void gen_set_NZ64(TCGv_i64 result)
 556 {
 557     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 558     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 559 }
 560
 561 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 562 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 563 {
 564     if (sf) {
 565         gen_set_NZ64(result);
 566     } else {
 567         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 568         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 569     }
 570     tcg_gen_movi_i32(cpu_CF, 0);
 571     tcg_gen_movi_i32(cpu_VF, 0);
 572 }
 573
 574 /* dest = T0 + T1; compute C, N, V and Z flags */
 575 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 576 {
 577     if (sf) {
 578         TCGv_i64 result, flag, tmp;
 579         result = tcg_temp_new_i64();
 580         flag = tcg_temp_new_i64();
 581         tmp = tcg_temp_new_i64();
 582
 583         tcg_gen_movi_i64(tmp, 0);
 584         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 585
 586         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 587
 588         gen_set_NZ64(result);
 589
 590         tcg_gen_xor_i64(flag, result, t0);
 591         tcg_gen_xor_i64(tmp, t0, t1);
 592         tcg_gen_andc_i64(flag, flag, tmp);
 593         tcg_temp_free_i64(tmp);
 594         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 595
 596         tcg_gen_mov_i64(dest, result);
 597         tcg_temp_free_i64(result);
 598         tcg_temp_free_i64(flag);
 599     } else {
 600         /* 32 bit arithmetic */
 601         TCGv_i32 t0_32 = tcg_temp_new_i32();
 602         TCGv_i32 t1_32 = tcg_temp_new_i32();
 603         TCGv_i32 tmp = tcg_temp_new_i32();
 604
 605         tcg_gen_movi_i32(tmp, 0);
 606         tcg_gen_extrl_i64_i32(t0_32, t0);
 607         tcg_gen_extrl_i64_i32(t1_32, t1);
 608         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 609         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 610         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 611         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 612         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 613         tcg_gen_extu_i32_i64(dest, cpu_NF);
 614
 615         tcg_temp_free_i32(tmp);
 616         tcg_temp_free_i32(t0_32);
 617         tcg_temp_free_i32(t1_32);
 618     }
 619 }
 620
 621 /* dest = T0 - T1; compute C, N, V and Z flags */
 622 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 623 {
 624     if (sf) {
 625         /* 64 bit arithmetic */
 626         TCGv_i64 result, flag, tmp;
 627
 628         result = tcg_temp_new_i64();
 629         flag = tcg_temp_new_i64();
 630         tcg_gen_sub_i64(result, t0, t1);
 631
 632         gen_set_NZ64(result);
 633
 634         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 635         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 636
 637         tcg_gen_xor_i64(flag, result, t0);
 638         tmp = tcg_temp_new_i64();
 639         tcg_gen_xor_i64(tmp, t0, t1);
 640         tcg_gen_and_i64(flag, flag, tmp);
 641         tcg_temp_free_i64(tmp);
 642         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 643         tcg_gen_mov_i64(dest, result);
 644         tcg_temp_free_i64(flag);
 645         tcg_temp_free_i64(result);
 646     } else {
 647         /* 32 bit arithmetic */
 648         TCGv_i32 t0_32 = tcg_temp_new_i32();
 649         TCGv_i32 t1_32 = tcg_temp_new_i32();
 650         TCGv_i32 tmp;
 651
 652         tcg_gen_extrl_i64_i32(t0_32, t0);
 653         tcg_gen_extrl_i64_i32(t1_32, t1);
 654         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 655         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 656         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 657         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 658         tmp = tcg_temp_new_i32();
 659         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 660         tcg_temp_free_i32(t0_32);
 661         tcg_temp_free_i32(t1_32);
 662         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 663         tcg_temp_free_i32(tmp);
 664         tcg_gen_extu_i32_i64(dest, cpu_NF);
 665     }
 666 }
 667
 668 /* dest = T0 + T1 + CF; do not compute flags. */
 669 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 670 {
 671     TCGv_i64 flag = tcg_temp_new_i64();
 672     tcg_gen_extu_i32_i64(flag, cpu_CF);
 673     tcg_gen_add_i64(dest, t0, t1);
 674     tcg_gen_add_i64(dest, dest, flag);
 675     tcg_temp_free_i64(flag);
 676
 677     if (!sf) {
 678         tcg_gen_ext32u_i64(dest, dest);
 679     }
 680 }
 681
 682 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 683 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 684 {
 685     if (sf) {
 686         TCGv_i64 result, cf_64, vf_64, tmp;
 687         result = tcg_temp_new_i64();
 688         cf_64 = tcg_temp_new_i64();
 689         vf_64 = tcg_temp_new_i64();
 690         tmp = tcg_const_i64(0);
 691
 692         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 693         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 694         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 695         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 696         gen_set_NZ64(result);
 697
 698         tcg_gen_xor_i64(vf_64, result, t0);
 699         tcg_gen_xor_i64(tmp, t0, t1);
 700         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 701         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 702
 703         tcg_gen_mov_i64(dest, result);
 704
 705         tcg_temp_free_i64(tmp);
 706         tcg_temp_free_i64(vf_64);
 707         tcg_temp_free_i64(cf_64);
 708         tcg_temp_free_i64(result);
 709     } else {
 710         TCGv_i32 t0_32, t1_32, tmp;
 711         t0_32 = tcg_temp_new_i32();
 712         t1_32 = tcg_temp_new_i32();
 713         tmp = tcg_const_i32(0);
 714
 715         tcg_gen_extrl_i64_i32(t0_32, t0);
 716         tcg_gen_extrl_i64_i32(t1_32, t1);
 717         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 718         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 719
 720         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 721         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 722         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 723         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 724         tcg_gen_extu_i32_i64(dest, cpu_NF);
 725
 726         tcg_temp_free_i32(tmp);
 727         tcg_temp_free_i32(t1_32);
 728         tcg_temp_free_i32(t0_32);
 729     }
 730 }
 731
 732 /*
 733  * Load/Store generators
 734  */
 735
 736 /*
 737  * Store from GPR register to memory.
 738  */
 739 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 740                              TCGv_i64 tcg_addr, int size, int memidx,
 741                              bool iss_valid,
 742                              unsigned int iss_srt,
 743                              bool iss_sf, bool iss_ar)
 744 {
 745     g_assert(size <= 3);
 746     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 747
 748     if (iss_valid) {
 749         uint32_t syn;
 750
 751         syn = syn_data_abort_with_iss(0,
 752                                       size,
 753                                       false,
 754                                       iss_srt,
 755                                       iss_sf,
 756                                       iss_ar,
 757                                       0, 0, 0, 0, 0, false);
 758         disas_set_insn_syndrome(s, syn);
 759     }
 760 }
 761
 762 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 763                       TCGv_i64 tcg_addr, int size,
 764                       bool iss_valid,
 765                       unsigned int iss_srt,
 766                       bool iss_sf, bool iss_ar)
 767 {
 768     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 769                      iss_valid, iss_srt, iss_sf, iss_ar);
 770 }
 771
 772 /*
 773  * Load from memory to GPR register
 774  */
 775 static void do_gpr_ld_memidx(DisasContext *s,
 776                              TCGv_i64 dest, TCGv_i64 tcg_addr,
 777                              int size, bool is_signed,
 778                              bool extend, int memidx,
 779                              bool iss_valid, unsigned int iss_srt,
 780                              bool iss_sf, bool iss_ar)
 781 {
 782     TCGMemOp memop = s->be_data + size;
 783
 784     g_assert(size <= 3);
 785
 786     if (is_signed) {
 787         memop += MO_SIGN;
 788     }
 789
 790     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 791
 792     if (extend && is_signed) {
 793         g_assert(size < 3);
 794         tcg_gen_ext32u_i64(dest, dest);
 795     }
 796
 797     if (iss_valid) {
 798         uint32_t syn;
 799
 800         syn = syn_data_abort_with_iss(0,
 801                                       size,
 802                                       is_signed,
 803                                       iss_srt,
 804                                       iss_sf,
 805                                       iss_ar,
 806                                       0, 0, 0, 0, 0, false);
 807         disas_set_insn_syndrome(s, syn);
 808     }
 809 }
 810
 811 static void do_gpr_ld(DisasContext *s,
 812                       TCGv_i64 dest, TCGv_i64 tcg_addr,
 813                       int size, bool is_signed, bool extend,
 814                       bool iss_valid, unsigned int iss_srt,
 815                       bool iss_sf, bool iss_ar)
 816 {
 817     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 818                      get_mem_index(s),
 819                      iss_valid, iss_srt, iss_sf, iss_ar);
 820 }
 821
 822 /*
 823  * Store from FP register to memory
 824  */
 825 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 826 {
 827     /* This writes the bottom N bits of a 128 bit wide vector to memory */
 828     TCGv_i64 tmp = tcg_temp_new_i64();
 829     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 830     if (size < 4) {
 831         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 832                             s->be_data + size);
 833     } else {
 834         bool be = s->be_data == MO_BE;
 835         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 836
 837         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 838         tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 839                             s->be_data | MO_Q);
 840         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 841         tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 842                             s->be_data | MO_Q);
 843         tcg_temp_free_i64(tcg_hiaddr);
 844     }
 845
 846     tcg_temp_free_i64(tmp);
 847 }
 848
 849 /*
 850  * Load from memory to FP register
 851  */
 852 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 853 {
 854     /* This always zero-extends and writes to a full 128 bit wide vector */
 855     TCGv_i64 tmplo = tcg_temp_new_i64();
 856     TCGv_i64 tmphi;
 857
 858     if (size < 4) {
 859         TCGMemOp memop = s->be_data + size;
 860         tmphi = tcg_const_i64(0);
 861         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 862     } else {
 863         bool be = s->be_data == MO_BE;
 864         TCGv_i64 tcg_hiaddr;
 865
 866         tmphi = tcg_temp_new_i64();
 867         tcg_hiaddr = tcg_temp_new_i64();
 868
 869         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 870         tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 871                             s->be_data | MO_Q);
 872         tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 873                             s->be_data | MO_Q);
 874         tcg_temp_free_i64(tcg_hiaddr);
 875     }
 876
 877     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
 878     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
 879
 880     tcg_temp_free_i64(tmplo);
 881     tcg_temp_free_i64(tmphi);
 882 }
 883
 884 /*
 885  * Vector load/store helpers.
 886  *
 887  * The principal difference between this and a FP load is that we don't
 888  * zero extend as we are filling a partial chunk of the vector register.
 889  * These functions don't support 128 bit loads/stores, which would be
 890  * normal load/store operations.
 891  *
 892  * The _i32 versions are useful when operating on 32 bit quantities
 893  * (eg for floating point single or using Neon helper functions).
 894  */
 895
 896 /* Get value of an element within a vector register */
 897 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
 898                              int element, TCGMemOp memop)
 899 {
 900     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 901     switch (memop) {
 902     case MO_8:
 903         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
 904         break;
 905     case MO_16:
 906         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
 907         break;
 908     case MO_32:
 909         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
 910         break;
 911     case MO_8|MO_SIGN:
 912         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
 913         break;
 914     case MO_16|MO_SIGN:
 915         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
 916         break;
 917     case MO_32|MO_SIGN:
 918         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
 919         break;
 920     case MO_64:
 921     case MO_64|MO_SIGN:
 922         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
 923         break;
 924     default:
 925         g_assert_not_reached();
 926     }
 927 }
 928
 929 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
 930                                  int element, TCGMemOp memop)
 931 {
 932     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
 933     switch (memop) {
 934     case MO_8:
 935         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
 936         break;
 937     case MO_16:
 938         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
 939         break;
 940     case MO_8|MO_SIGN:
 941         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
 942         break;
 943     case MO_16|MO_SIGN:
 944         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
 945         break;
 946     case MO_32:
 947     case MO_32|MO_SIGN:
 948         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
 949         break;
 950     default:
 951         g_assert_not_reached();
 952     }
 953 }
 954
 955 /* Set value of an element within a vector register */
 956 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
 957                               int element, TCGMemOp memop)
 958 {
 959     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 960     switch (memop) {
 961     case MO_8:
 962         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
 963         break;
 964     case MO_16:
 965         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
 966         break;
 967     case MO_32:
 968         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
 969         break;
 970     case MO_64:
 971         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
 972         break;
 973     default:
 974         g_assert_not_reached();
 975     }
 976 }
 977
 978 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
 979                                   int destidx, int element, TCGMemOp memop)
 980 {
 981     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
 982     switch (memop) {
 983     case MO_8:
 984         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
 985         break;
 986     case MO_16:
 987         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
 988         break;
 989     case MO_32:
 990         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
 991         break;
 992     default:
 993         g_assert_not_reached();
 994     }
 995 }
 996
 997 /* Clear the high 64 bits of a 128 bit vector (in general non-quad
 998  * vector ops all need to do this).
 999  */
1000 static void clear_vec_high(DisasContext *s, int rd)
1001 {
1002     TCGv_i64 tcg_zero = tcg_const_i64(0);
1003
1004     write_vec_element(s, tcg_zero, rd, 1, MO_64);
1005     tcg_temp_free_i64(tcg_zero);
1006 }
1007
1008 /* Store from vector register to memory */
1009 static void do_vec_st(DisasContext *s, int srcidx, int element,
1010                       TCGv_i64 tcg_addr, int size)
1011 {
1012     TCGMemOp memop = s->be_data + size;
1013     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1014
1015     read_vec_element(s, tcg_tmp, srcidx, element, size);
1016     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1017
1018     tcg_temp_free_i64(tcg_tmp);
1019 }
1020
1021 /* Load from memory to vector register */
1022 static void do_vec_ld(DisasContext *s, int destidx, int element,
1023                       TCGv_i64 tcg_addr, int size)
1024 {
1025     TCGMemOp memop = s->be_data + size;
1026     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1027
1028     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1029     write_vec_element(s, tcg_tmp, destidx, element, size);
1030
1031     tcg_temp_free_i64(tcg_tmp);
1032 }
1033
1034 /* Check that FP/Neon access is enabled. If it is, return
1035  * true. If not, emit code to generate an appropriate exception,
1036  * and return false; the caller should not emit any code for
1037  * the instruction. Note that this check must happen after all
1038  * unallocated-encoding checks (otherwise the syndrome information
1039  * for the resulting exception will be incorrect).
1040  */
1041 static inline bool fp_access_check(DisasContext *s)
1042 {
1043     assert(!s->fp_access_checked);
1044     s->fp_access_checked = true;
1045
1046     if (!s->fp_excp_el) {
1047         return true;
1048     }
1049
1050     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1051                        s->fp_excp_el);
1052     return false;
1053 }
1054
1055 /*
1056  * This utility function is for doing register extension with an
1057  * optional shift. You will likely want to pass a temporary for the
1058  * destination register. See DecodeRegExtend() in the ARM ARM.
1059  */
1060 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1061                               int option, unsigned int shift)
1062 {
1063     int extsize = extract32(option, 0, 2);
1064     bool is_signed = extract32(option, 2, 1);
1065
1066     if (is_signed) {
1067         switch (extsize) {
1068         case 0:
1069             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1070             break;
1071         case 1:
1072             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1073             break;
1074         case 2:
1075             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1076             break;
1077         case 3:
1078             tcg_gen_mov_i64(tcg_out, tcg_in);
1079             break;
1080         }
1081     } else {
1082         switch (extsize) {
1083         case 0:
1084             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1085             break;
1086         case 1:
1087             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1088             break;
1089         case 2:
1090             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1091             break;
1092         case 3:
1093             tcg_gen_mov_i64(tcg_out, tcg_in);
1094             break;
1095         }
1096     }
1097
1098     if (shift) {
1099         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1100     }
1101 }
1102
1103 static inline void gen_check_sp_alignment(DisasContext *s)
1104 {
1105     /* The AArch64 architecture mandates that (if enabled via PSTATE
1106      * or SCTLR bits) there is a check that SP is 16-aligned on every
1107      * SP-relative load or store (with an exception generated if it is not).
1108      * In line with general QEMU practice regarding misaligned accesses,
1109      * we omit these checks for the sake of guest program performance.
1110      * This function is provided as a hook so we can more easily add these
1111      * checks in future (possibly as a "favour catching guest program bugs
1112      * over speed" user selectable option).
1113      */
1114 }
1115
1116 /*
1117  * This provides a simple table based table lookup decoder. It is
1118  * intended to be used when the relevant bits for decode are too
1119  * awkwardly placed and switch/if based logic would be confusing and
1120  * deeply nested. Since it's a linear search through the table, tables
1121  * should be kept small.
1122  *
1123  * It returns the first handler where insn & mask == pattern, or
1124  * NULL if there is no match.
1125  * The table is terminated by an empty mask (i.e. 0)
1126  */
1127 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1128                                                uint32_t insn)
1129 {
1130     const AArch64DecodeTable *tptr = table;
1131
1132     while (tptr->mask) {
1133         if ((insn & tptr->mask) == tptr->pattern) {
1134             return tptr->disas_fn;
1135         }
1136         tptr++;
1137     }
1138     return NULL;
1139 }
1140
1141 /*
1142  * the instruction disassembly implemented here matches
1143  * the instruction encoding classifications in chapter 3 (C3)
1144  * of the ARM Architecture Reference Manual (DDI0487A_a)
1145  */
1146
1147 /* C3.2.7 Unconditional branch (immediate)
1148  *   31  30       26 25                                  0
1149  * +----+-----------+-------------------------------------+
1150  * | op | 0 0 1 0 1 |                 imm26               |
1151  * +----+-----------+-------------------------------------+
1152  */
1153 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1154 {
1155     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1156
1157     if (insn & (1U << 31)) {
1158         /* C5.6.26 BL Branch with link */
1159         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1160     }
1161
1162     /* C5.6.20 B Branch / C5.6.26 BL Branch with link */
1163     gen_goto_tb(s, 0, addr);
1164 }
1165
1166 /* C3.2.1 Compare & branch (immediate)
1167  *   31  30         25  24  23                  5 4      0
1168  * +----+-------------+----+---------------------+--------+
1169  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1170  * +----+-------------+----+---------------------+--------+
1171  */
1172 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1173 {
1174     unsigned int sf, op, rt;
1175     uint64_t addr;
1176     TCGLabel *label_match;
1177     TCGv_i64 tcg_cmp;
1178
1179     sf = extract32(insn, 31, 1);
1180     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1181     rt = extract32(insn, 0, 5);
1182     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1183
1184     tcg_cmp = read_cpu_reg(s, rt, sf);
1185     label_match = gen_new_label();
1186
1187     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1188                         tcg_cmp, 0, label_match);
1189
1190     gen_goto_tb(s, 0, s->pc);
1191     gen_set_label(label_match);
1192     gen_goto_tb(s, 1, addr);
1193 }
1194
1195 /* C3.2.5 Test & branch (immediate)
1196  *   31  30         25  24  23   19 18          5 4    0
1197  * +----+-------------+----+-------+-------------+------+
1198  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1199  * +----+-------------+----+-------+-------------+------+
1200  */
1201 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1202 {
1203     unsigned int bit_pos, op, rt;
1204     uint64_t addr;
1205     TCGLabel *label_match;
1206     TCGv_i64 tcg_cmp;
1207
1208     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1209     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1210     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1211     rt = extract32(insn, 0, 5);
1212
1213     tcg_cmp = tcg_temp_new_i64();
1214     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1215     label_match = gen_new_label();
1216     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1217                         tcg_cmp, 0, label_match);
1218     tcg_temp_free_i64(tcg_cmp);
1219     gen_goto_tb(s, 0, s->pc);
1220     gen_set_label(label_match);
1221     gen_goto_tb(s, 1, addr);
1222 }
1223
1224 /* C3.2.2 / C5.6.19 Conditional branch (immediate)
1225  *  31           25  24  23                  5   4  3    0
1226  * +---------------+----+---------------------+----+------+
1227  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1228  * +---------------+----+---------------------+----+------+
1229  */
1230 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1231 {
1232     unsigned int cond;
1233     uint64_t addr;
1234
1235     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1236         unallocated_encoding(s);
1237         return;
1238     }
1239     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1240     cond = extract32(insn, 0, 4);
1241
1242     if (cond < 0x0e) {
1243         /* genuinely conditional branches */
1244         TCGLabel *label_match = gen_new_label();
1245         arm_gen_test_cc(cond, label_match);
1246         gen_goto_tb(s, 0, s->pc);
1247         gen_set_label(label_match);
1248         gen_goto_tb(s, 1, addr);
1249     } else {
1250         /* 0xe and 0xf are both "always" conditions */
1251         gen_goto_tb(s, 0, addr);
1252     }
1253 }
1254
1255 /* C5.6.68 HINT */
1256 static void handle_hint(DisasContext *s, uint32_t insn,
1257                         unsigned int op1, unsigned int op2, unsigned int crm)
1258 {
1259     unsigned int selector = crm << 3 | op2;
1260
1261     if (op1 != 3) {
1262         unallocated_encoding(s);
1263         return;
1264     }
1265
1266     switch (selector) {
1267     case 0: /* NOP */
1268         return;
1269     case 3: /* WFI */
1270         s->is_jmp = DISAS_WFI;
1271         return;
1272     case 1: /* YIELD */
1273         s->is_jmp = DISAS_YIELD;
1274         return;
1275     case 2: /* WFE */
1276         s->is_jmp = DISAS_WFE;
1277         return;
1278     case 4: /* SEV */
1279     case 5: /* SEVL */
1280         /* we treat all as NOP at least for now */
1281         return;
1282     default:
1283         /* default specified as NOP equivalent */
1284         return;
1285     }
1286 }
1287
1288 static void gen_clrex(DisasContext *s, uint32_t insn)
1289 {
1290     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1291 }
1292
1293 /* CLREX, DSB, DMB, ISB */
1294 static void handle_sync(DisasContext *s, uint32_t insn,
1295                         unsigned int op1, unsigned int op2, unsigned int crm)
1296 {
1297     if (op1 != 3) {
1298         unallocated_encoding(s);
1299         return;
1300     }
1301
1302     switch (op2) {
1303     case 2: /* CLREX */
1304         gen_clrex(s, insn);
1305         return;
1306     case 4: /* DSB */
1307     case 5: /* DMB */
1308         /* We don't emulate caches so barriers are no-ops */
1309         return;
1310     case 6: /* ISB */
1311         /* We need to break the TB after this insn to execute
1312          * a self-modified code correctly and also to take
1313          * any pending interrupts immediately.
1314          */
1315         s->is_jmp = DISAS_UPDATE;
1316         return;
1317     default:
1318         unallocated_encoding(s);
1319         return;
1320     }
1321 }
1322
1323 /* C5.6.130 MSR (immediate) - move immediate to processor state field */
1324 static void handle_msr_i(DisasContext *s, uint32_t insn,
1325                          unsigned int op1, unsigned int op2, unsigned int crm)
1326 {
1327     int op = op1 << 3 | op2;
1328     switch (op) {
1329     case 0x05: /* SPSel */
1330         if (s->current_el == 0) {
1331             unallocated_encoding(s);
1332             return;
1333         }
1334         /* fall through */
1335     case 0x1e: /* DAIFSet */
1336     case 0x1f: /* DAIFClear */
1337     {
1338         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1339         TCGv_i32 tcg_op = tcg_const_i32(op);
1340         gen_a64_set_pc_im(s->pc - 4);
1341         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1342         tcg_temp_free_i32(tcg_imm);
1343         tcg_temp_free_i32(tcg_op);
1344         s->is_jmp = DISAS_UPDATE;
1345         break;
1346     }
1347     default:
1348         unallocated_encoding(s);
1349         return;
1350     }
1351 }
1352
1353 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1354 {
1355     TCGv_i32 tmp = tcg_temp_new_i32();
1356     TCGv_i32 nzcv = tcg_temp_new_i32();
1357
1358     /* build bit 31, N */
1359     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1360     /* build bit 30, Z */
1361     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1362     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1363     /* build bit 29, C */
1364     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1365     /* build bit 28, V */
1366     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1367     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1368     /* generate result */
1369     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1370
1371     tcg_temp_free_i32(nzcv);
1372     tcg_temp_free_i32(tmp);
1373 }
1374
1375 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1376
1377 {
1378     TCGv_i32 nzcv = tcg_temp_new_i32();
1379
1380     /* take NZCV from R[t] */
1381     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1382
1383     /* bit 31, N */
1384     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1385     /* bit 30, Z */
1386     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1387     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1388     /* bit 29, C */
1389     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1390     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1391     /* bit 28, V */
1392     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1393     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1394     tcg_temp_free_i32(nzcv);
1395 }
1396
1397 /* C5.6.129 MRS - move from system register
1398  * C5.6.131 MSR (register) - move to system register
1399  * C5.6.204 SYS
1400  * C5.6.205 SYSL
1401  * These are all essentially the same insn in 'read' and 'write'
1402  * versions, with varying op0 fields.
1403  */
1404 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1405                        unsigned int op0, unsigned int op1, unsigned int op2,
1406                        unsigned int crn, unsigned int crm, unsigned int rt)
1407 {
1408     const ARMCPRegInfo *ri;
1409     TCGv_i64 tcg_rt;
1410
1411     ri = get_arm_cp_reginfo(s->cp_regs,
1412                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1413                                                crn, crm, op0, op1, op2));
1414
1415     if (!ri) {
1416         /* Unknown register; this might be a guest error or a QEMU
1417          * unimplemented feature.
1418          */
1419         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1420                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1421                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1422         unallocated_encoding(s);
1423         return;
1424     }
1425
1426     /* Check access permissions */
1427     if (!cp_access_ok(s->current_el, ri, isread)) {
1428         unallocated_encoding(s);
1429         return;
1430     }
1431
1432     if (ri->accessfn) {
1433         /* Emit code to perform further access permissions checks at
1434          * runtime; this may result in an exception.
1435          */
1436         TCGv_ptr tmpptr;
1437         TCGv_i32 tcg_syn, tcg_isread;
1438         uint32_t syndrome;
1439
1440         gen_a64_set_pc_im(s->pc - 4);
1441         tmpptr = tcg_const_ptr(ri);
1442         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1443         tcg_syn = tcg_const_i32(syndrome);
1444         tcg_isread = tcg_const_i32(isread);
1445         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1446         tcg_temp_free_ptr(tmpptr);
1447         tcg_temp_free_i32(tcg_syn);
1448         tcg_temp_free_i32(tcg_isread);
1449     }
1450
1451     /* Handle special cases first */
1452     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1453     case ARM_CP_NOP:
1454         return;
1455     case ARM_CP_NZCV:
1456         tcg_rt = cpu_reg(s, rt);
1457         if (isread) {
1458             gen_get_nzcv(tcg_rt);
1459         } else {
1460             gen_set_nzcv(tcg_rt);
1461         }
1462         return;
1463     case ARM_CP_CURRENTEL:
1464         /* Reads as current EL value from pstate, which is
1465          * guaranteed to be constant by the tb flags.
1466          */
1467         tcg_rt = cpu_reg(s, rt);
1468         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1469         return;
1470     case ARM_CP_DC_ZVA:
1471         /* Writes clear the aligned block of memory which rt points into. */
1472         tcg_rt = cpu_reg(s, rt);
1473         gen_helper_dc_zva(cpu_env, tcg_rt);
1474         return;
1475     default:
1476         break;
1477     }
1478
1479     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1480         gen_io_start();
1481     }
1482
1483     tcg_rt = cpu_reg(s, rt);
1484
1485     if (isread) {
1486         if (ri->type & ARM_CP_CONST) {
1487             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1488         } else if (ri->readfn) {
1489             TCGv_ptr tmpptr;
1490             tmpptr = tcg_const_ptr(ri);
1491             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1492             tcg_temp_free_ptr(tmpptr);
1493         } else {
1494             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1495         }
1496     } else {
1497         if (ri->type & ARM_CP_CONST) {
1498             /* If not forbidden by access permissions, treat as WI */
1499             return;
1500         } else if (ri->writefn) {
1501             TCGv_ptr tmpptr;
1502             tmpptr = tcg_const_ptr(ri);
1503             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1504             tcg_temp_free_ptr(tmpptr);
1505         } else {
1506             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1507         }
1508     }
1509
1510     if ((s->tb->cflags & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1511         /* I/O operations must end the TB here (whether read or write) */
1512         gen_io_end();
1513         s->is_jmp = DISAS_UPDATE;
1514     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1515         /* We default to ending the TB on a coprocessor register write,
1516          * but allow this to be suppressed by the register definition
1517          * (usually only necessary to work around guest bugs).
1518          */
1519         s->is_jmp = DISAS_UPDATE;
1520     }
1521 }
1522
1523 /* C3.2.4 System
1524  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1525  * +---------------------+---+-----+-----+-------+-------+-----+------+
1526  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1527  * +---------------------+---+-----+-----+-------+-------+-----+------+
1528  */
1529 static void disas_system(DisasContext *s, uint32_t insn)
1530 {
1531     unsigned int l, op0, op1, crn, crm, op2, rt;
1532     l = extract32(insn, 21, 1);
1533     op0 = extract32(insn, 19, 2);
1534     op1 = extract32(insn, 16, 3);
1535     crn = extract32(insn, 12, 4);
1536     crm = extract32(insn, 8, 4);
1537     op2 = extract32(insn, 5, 3);
1538     rt = extract32(insn, 0, 5);
1539
1540     if (op0 == 0) {
1541         if (l || rt != 31) {
1542             unallocated_encoding(s);
1543             return;
1544         }
1545         switch (crn) {
1546         case 2: /* C5.6.68 HINT */
1547             handle_hint(s, insn, op1, op2, crm);
1548             break;
1549         case 3: /* CLREX, DSB, DMB, ISB */
1550             handle_sync(s, insn, op1, op2, crm);
1551             break;
1552         case 4: /* C5.6.130 MSR (immediate) */
1553             handle_msr_i(s, insn, op1, op2, crm);
1554             break;
1555         default:
1556             unallocated_encoding(s);
1557             break;
1558         }
1559         return;
1560     }
1561     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1562 }
1563
1564 /* C3.2.3 Exception generation
1565  *
1566  *  31             24 23 21 20                     5 4   2 1  0
1567  * +-----------------+-----+------------------------+-----+----+
1568  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1569  * +-----------------------+------------------------+----------+
1570  */
1571 static void disas_exc(DisasContext *s, uint32_t insn)
1572 {
1573     int opc = extract32(insn, 21, 3);
1574     int op2_ll = extract32(insn, 0, 5);
1575     int imm16 = extract32(insn, 5, 16);
1576     TCGv_i32 tmp;
1577
1578     switch (opc) {
1579     case 0:
1580         /* For SVC, HVC and SMC we advance the single-step state
1581          * machine before taking the exception. This is architecturally
1582          * mandated, to ensure that single-stepping a system call
1583          * instruction works properly.
1584          */
1585         switch (op2_ll) {
1586         case 1:
1587             gen_ss_advance(s);
1588             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1589                                default_exception_el(s));
1590             break;
1591         case 2:
1592             if (s->current_el == 0) {
1593                 unallocated_encoding(s);
1594                 break;
1595             }
1596             /* The pre HVC helper handles cases when HVC gets trapped
1597              * as an undefined insn by runtime configuration.
1598              */
1599             gen_a64_set_pc_im(s->pc - 4);
1600             gen_helper_pre_hvc(cpu_env);
1601             gen_ss_advance(s);
1602             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1603             break;
1604         case 3:
1605             if (s->current_el == 0) {
1606                 unallocated_encoding(s);
1607                 break;
1608             }
1609             gen_a64_set_pc_im(s->pc - 4);
1610             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1611             gen_helper_pre_smc(cpu_env, tmp);
1612             tcg_temp_free_i32(tmp);
1613             gen_ss_advance(s);
1614             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1615             break;
1616         default:
1617             unallocated_encoding(s);
1618             break;
1619         }
1620         break;
1621     case 1:
1622         if (op2_ll != 0) {
1623             unallocated_encoding(s);
1624             break;
1625         }
1626         /* BRK */
1627         gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1628                            default_exception_el(s));
1629         break;
1630     case 2:
1631         if (op2_ll != 0) {
1632             unallocated_encoding(s);
1633             break;
1634         }
1635         /* HLT. This has two purposes.
1636          * Architecturally, it is an external halting debug instruction.
1637          * Since QEMU doesn't implement external debug, we treat this as
1638          * it is required for halting debug disabled: it will UNDEF.
1639          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1640          */
1641         if (semihosting_enabled() && imm16 == 0xf000) {
1642 #ifndef CONFIG_USER_ONLY
1643             /* In system mode, don't allow userspace access to semihosting,
1644              * to provide some semblance of security (and for consistency
1645              * with our 32-bit semihosting).
1646              */
1647             if (s->current_el == 0) {
1648                 unsupported_encoding(s, insn);
1649                 break;
1650             }
1651 #endif
1652             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1653         } else {
1654             unsupported_encoding(s, insn);
1655         }
1656         break;
1657     case 5:
1658         if (op2_ll < 1 || op2_ll > 3) {
1659             unallocated_encoding(s);
1660             break;
1661         }
1662         /* DCPS1, DCPS2, DCPS3 */
1663         unsupported_encoding(s, insn);
1664         break;
1665     default:
1666         unallocated_encoding(s);
1667         break;
1668     }
1669 }
1670
1671 /* C3.2.7 Unconditional branch (register)
1672  *  31           25 24   21 20   16 15   10 9    5 4     0
1673  * +---------------+-------+-------+-------+------+-------+
1674  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1675  * +---------------+-------+-------+-------+------+-------+
1676  */
1677 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1678 {
1679     unsigned int opc, op2, op3, rn, op4;
1680
1681     opc = extract32(insn, 21, 4);
1682     op2 = extract32(insn, 16, 5);
1683     op3 = extract32(insn, 10, 6);
1684     rn = extract32(insn, 5, 5);
1685     op4 = extract32(insn, 0, 5);
1686
1687     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1688         unallocated_encoding(s);
1689         return;
1690     }
1691
1692     switch (opc) {
1693     case 0: /* BR */
1694     case 2: /* RET */
1695         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1696         break;
1697     case 1: /* BLR */
1698         tcg_gen_mov_i64(cpu_pc, cpu_reg(s, rn));
1699         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1700         break;
1701     case 4: /* ERET */
1702         if (s->current_el == 0) {
1703             unallocated_encoding(s);
1704             return;
1705         }
1706         gen_helper_exception_return(cpu_env);
1707         s->is_jmp = DISAS_JUMP;
1708         return;
1709     case 5: /* DRPS */
1710         if (rn != 0x1f) {
1711             unallocated_encoding(s);
1712         } else {
1713             unsupported_encoding(s, insn);
1714         }
1715         return;
1716     default:
1717         unallocated_encoding(s);
1718         return;
1719     }
1720
1721     s->is_jmp = DISAS_JUMP;
1722 }
1723
1724 /* C3.2 Branches, exception generating and system instructions */
1725 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1726 {
1727     switch (extract32(insn, 25, 7)) {
1728     case 0x0a: case 0x0b:
1729     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1730         disas_uncond_b_imm(s, insn);
1731         break;
1732     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1733         disas_comp_b_imm(s, insn);
1734         break;
1735     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1736         disas_test_b_imm(s, insn);
1737         break;
1738     case 0x2a: /* Conditional branch (immediate) */
1739         disas_cond_b_imm(s, insn);
1740         break;
1741     case 0x6a: /* Exception generation / System */
1742         if (insn & (1 << 24)) {
1743             disas_system(s, insn);
1744         } else {
1745             disas_exc(s, insn);
1746         }
1747         break;
1748     case 0x6b: /* Unconditional branch (register) */
1749         disas_uncond_b_reg(s, insn);
1750         break;
1751     default:
1752         unallocated_encoding(s);
1753         break;
1754     }
1755 }
1756
1757 /*
1758  * Load/Store exclusive instructions are implemented by remembering
1759  * the value/address loaded, and seeing if these are the same
1760  * when the store is performed. This is not actually the architecturally
1761  * mandated semantics, but it works for typical guest code sequences
1762  * and avoids having to monitor regular stores.
1763  *
1764  * In system emulation mode only one CPU will be running at once, so
1765  * this sequence is effectively atomic.  In user emulation mode we
1766  * throw an exception and handle the atomic operation elsewhere.
1767  */
1768 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1769                                TCGv_i64 addr, int size, bool is_pair)
1770 {
1771     TCGv_i64 tmp = tcg_temp_new_i64();
1772     TCGMemOp memop = s->be_data + size;
1773
1774     g_assert(size <= 3);
1775     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), memop);
1776
1777     if (is_pair) {
1778         TCGv_i64 addr2 = tcg_temp_new_i64();
1779         TCGv_i64 hitmp = tcg_temp_new_i64();
1780
1781         g_assert(size >= 2);
1782         tcg_gen_addi_i64(addr2, addr, 1 << size);
1783         tcg_gen_qemu_ld_i64(hitmp, addr2, get_mem_index(s), memop);
1784         tcg_temp_free_i64(addr2);
1785         tcg_gen_mov_i64(cpu_exclusive_high, hitmp);
1786         tcg_gen_mov_i64(cpu_reg(s, rt2), hitmp);
1787         tcg_temp_free_i64(hitmp);
1788     }
1789
1790     tcg_gen_mov_i64(cpu_exclusive_val, tmp);
1791     tcg_gen_mov_i64(cpu_reg(s, rt), tmp);
1792
1793     tcg_temp_free_i64(tmp);
1794     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1795 }
1796
1797 #ifdef CONFIG_USER_ONLY
1798 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1799                                 TCGv_i64 addr, int size, int is_pair)
1800 {
1801     tcg_gen_mov_i64(cpu_exclusive_test, addr);
1802     tcg_gen_movi_i32(cpu_exclusive_info,
1803                      size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14));
1804     gen_exception_internal_insn(s, 4, EXCP_STREX);
1805 }
1806 #else
1807 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1808                                 TCGv_i64 inaddr, int size, int is_pair)
1809 {
1810     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1811      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1812      *     [addr] = {Rt};
1813      *     if (is_pair) {
1814      *         [addr + datasize] = {Rt2};
1815      *     }
1816      *     {Rd} = 0;
1817      * } else {
1818      *     {Rd} = 1;
1819      * }
1820      * env->exclusive_addr = -1;
1821      */
1822     TCGLabel *fail_label = gen_new_label();
1823     TCGLabel *done_label = gen_new_label();
1824     TCGv_i64 addr = tcg_temp_local_new_i64();
1825     TCGv_i64 tmp;
1826
1827     /* Copy input into a local temp so it is not trashed when the
1828      * basic block ends at the branch insn.
1829      */
1830     tcg_gen_mov_i64(addr, inaddr);
1831     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
1832
1833     tmp = tcg_temp_new_i64();
1834     tcg_gen_qemu_ld_i64(tmp, addr, get_mem_index(s), s->be_data + size);
1835     tcg_gen_brcond_i64(TCG_COND_NE, tmp, cpu_exclusive_val, fail_label);
1836     tcg_temp_free_i64(tmp);
1837
1838     if (is_pair) {
1839         TCGv_i64 addrhi = tcg_temp_new_i64();
1840         TCGv_i64 tmphi = tcg_temp_new_i64();
1841
1842         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1843         tcg_gen_qemu_ld_i64(tmphi, addrhi, get_mem_index(s),
1844                             s->be_data + size);
1845         tcg_gen_brcond_i64(TCG_COND_NE, tmphi, cpu_exclusive_high, fail_label);
1846
1847         tcg_temp_free_i64(tmphi);
1848         tcg_temp_free_i64(addrhi);
1849     }
1850
1851     /* We seem to still have the exclusive monitor, so do the store */
1852     tcg_gen_qemu_st_i64(cpu_reg(s, rt), addr, get_mem_index(s),
1853                         s->be_data + size);
1854     if (is_pair) {
1855         TCGv_i64 addrhi = tcg_temp_new_i64();
1856
1857         tcg_gen_addi_i64(addrhi, addr, 1 << size);
1858         tcg_gen_qemu_st_i64(cpu_reg(s, rt2), addrhi,
1859                             get_mem_index(s), s->be_data + size);
1860         tcg_temp_free_i64(addrhi);
1861     }
1862
1863     tcg_temp_free_i64(addr);
1864
1865     tcg_gen_movi_i64(cpu_reg(s, rd), 0);
1866     tcg_gen_br(done_label);
1867     gen_set_label(fail_label);
1868     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
1869     gen_set_label(done_label);
1870     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1871
1872 }
1873 #endif
1874
1875 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
1876  * from the ARMv8 specs for LDR (Shared decode for all encodings).
1877  */
1878 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
1879 {
1880     int opc0 = extract32(opc, 0, 1);
1881     int regsize;
1882
1883     if (is_signed) {
1884         regsize = opc0 ? 32 : 64;
1885     } else {
1886         regsize = size == 3 ? 64 : 32;
1887     }
1888     return regsize == 64;
1889 }
1890
1891 /* C3.3.6 Load/store exclusive
1892  *
1893  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
1894  * +-----+-------------+----+---+----+------+----+-------+------+------+
1895  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
1896  * +-----+-------------+----+---+----+------+----+-------+------+------+
1897  *
1898  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
1899  *   L: 0 -> store, 1 -> load
1900  *  o2: 0 -> exclusive, 1 -> not
1901  *  o1: 0 -> single register, 1 -> register pair
1902  *  o0: 1 -> load-acquire/store-release, 0 -> not
1903  */
1904 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
1905 {
1906     int rt = extract32(insn, 0, 5);
1907     int rn = extract32(insn, 5, 5);
1908     int rt2 = extract32(insn, 10, 5);
1909     int is_lasr = extract32(insn, 15, 1);
1910     int rs = extract32(insn, 16, 5);
1911     int is_pair = extract32(insn, 21, 1);
1912     int is_store = !extract32(insn, 22, 1);
1913     int is_excl = !extract32(insn, 23, 1);
1914     int size = extract32(insn, 30, 2);
1915     TCGv_i64 tcg_addr;
1916
1917     if ((!is_excl && !is_pair && !is_lasr) ||
1918         (!is_excl && is_pair) ||
1919         (is_pair && size < 2)) {
1920         unallocated_encoding(s);
1921         return;
1922     }
1923
1924     if (rn == 31) {
1925         gen_check_sp_alignment(s);
1926     }
1927     tcg_addr = read_cpu_reg_sp(s, rn, 1);
1928
1929     /* Note that since TCG is single threaded load-acquire/store-release
1930      * semantics require no extra if (is_lasr) { ... } handling.
1931      */
1932
1933     if (is_excl) {
1934         if (!is_store) {
1935             s->is_ldex = true;
1936             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
1937         } else {
1938             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
1939         }
1940     } else {
1941         TCGv_i64 tcg_rt = cpu_reg(s, rt);
1942         bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
1943
1944         /* Generate ISS for non-exclusive accesses including LASR.  */
1945         if (is_store) {
1946             do_gpr_st(s, tcg_rt, tcg_addr, size,
1947                       true, rt, iss_sf, is_lasr);
1948         } else {
1949             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
1950                       true, rt, iss_sf, is_lasr);
1951         }
1952     }
1953 }
1954
1955 /*
1956  * C3.3.5 Load register (literal)
1957  *
1958  *  31 30 29   27  26 25 24 23                5 4     0
1959  * +-----+-------+---+-----+-------------------+-------+
1960  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
1961  * +-----+-------+---+-----+-------------------+-------+
1962  *
1963  * V: 1 -> vector (simd/fp)
1964  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
1965  *                   10-> 32 bit signed, 11 -> prefetch
1966  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
1967  */
1968 static void disas_ld_lit(DisasContext *s, uint32_t insn)
1969 {
1970     int rt = extract32(insn, 0, 5);
1971     int64_t imm = sextract32(insn, 5, 19) << 2;
1972     bool is_vector = extract32(insn, 26, 1);
1973     int opc = extract32(insn, 30, 2);
1974     bool is_signed = false;
1975     int size = 2;
1976     TCGv_i64 tcg_rt, tcg_addr;
1977
1978     if (is_vector) {
1979         if (opc == 3) {
1980             unallocated_encoding(s);
1981             return;
1982         }
1983         size = 2 + opc;
1984         if (!fp_access_check(s)) {
1985             return;
1986         }
1987     } else {
1988         if (opc == 3) {
1989             /* PRFM (literal) : prefetch */
1990             return;
1991         }
1992         size = 2 + extract32(opc, 0, 1);
1993         is_signed = extract32(opc, 1, 1);
1994     }
1995
1996     tcg_rt = cpu_reg(s, rt);
1997
1998     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
1999     if (is_vector) {
2000         do_fp_ld(s, rt, tcg_addr, size);
2001     } else {
2002         /* Only unsigned 32bit loads target 32bit registers.  */
2003         bool iss_sf = opc == 0 ? 32 : 64;
2004
2005         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2006                   true, rt, iss_sf, false);
2007     }
2008     tcg_temp_free_i64(tcg_addr);
2009 }
2010
2011 /*
2012  * C5.6.80 LDNP (Load Pair - non-temporal hint)
2013  * C5.6.81 LDP (Load Pair - non vector)
2014  * C5.6.82 LDPSW (Load Pair Signed Word - non vector)
2015  * C5.6.176 STNP (Store Pair - non-temporal hint)
2016  * C5.6.177 STP (Store Pair - non vector)
2017  * C6.3.165 LDNP (Load Pair of SIMD&FP - non-temporal hint)
2018  * C6.3.165 LDP (Load Pair of SIMD&FP)
2019  * C6.3.284 STNP (Store Pair of SIMD&FP - non-temporal hint)
2020  * C6.3.284 STP (Store Pair of SIMD&FP)
2021  *
2022  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2023  * +-----+-------+---+---+-------+---+-----------------------------+
2024  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2025  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2026  *
2027  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2028  *      LDPSW                    01
2029  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2030  *   V: 0 -> GPR, 1 -> Vector
2031  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2032  *      10 -> signed offset, 11 -> pre-index
2033  *   L: 0 -> Store 1 -> Load
2034  *
2035  * Rt, Rt2 = GPR or SIMD registers to be stored
2036  * Rn = general purpose register containing address
2037  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2038  */
2039 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2040 {
2041     int rt = extract32(insn, 0, 5);
2042     int rn = extract32(insn, 5, 5);
2043     int rt2 = extract32(insn, 10, 5);
2044     uint64_t offset = sextract64(insn, 15, 7);
2045     int index = extract32(insn, 23, 2);
2046     bool is_vector = extract32(insn, 26, 1);
2047     bool is_load = extract32(insn, 22, 1);
2048     int opc = extract32(insn, 30, 2);
2049
2050     bool is_signed = false;
2051     bool postindex = false;
2052     bool wback = false;
2053
2054     TCGv_i64 tcg_addr; /* calculated address */
2055     int size;
2056
2057     if (opc == 3) {
2058         unallocated_encoding(s);
2059         return;
2060     }
2061
2062     if (is_vector) {
2063         size = 2 + opc;
2064     } else {
2065         size = 2 + extract32(opc, 1, 1);
2066         is_signed = extract32(opc, 0, 1);
2067         if (!is_load && is_signed) {
2068             unallocated_encoding(s);
2069             return;
2070         }
2071     }
2072
2073     switch (index) {
2074     case 1: /* post-index */
2075         postindex = true;
2076         wback = true;
2077         break;
2078     case 0:
2079         /* signed offset with "non-temporal" hint. Since we don't emulate
2080          * caches we don't care about hints to the cache system about
2081          * data access patterns, and handle this identically to plain
2082          * signed offset.
2083          */
2084         if (is_signed) {
2085             /* There is no non-temporal-hint version of LDPSW */
2086             unallocated_encoding(s);
2087             return;
2088         }
2089         postindex = false;
2090         break;
2091     case 2: /* signed offset, rn not updated */
2092         postindex = false;
2093         break;
2094     case 3: /* pre-index */
2095         postindex = false;
2096         wback = true;
2097         break;
2098     }
2099
2100     if (is_vector && !fp_access_check(s)) {
2101         return;
2102     }
2103
2104     offset <<= size;
2105
2106     if (rn == 31) {
2107         gen_check_sp_alignment(s);
2108     }
2109
2110     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2111
2112     if (!postindex) {
2113         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2114     }
2115
2116     if (is_vector) {
2117         if (is_load) {
2118             do_fp_ld(s, rt, tcg_addr, size);
2119         } else {
2120             do_fp_st(s, rt, tcg_addr, size);
2121         }
2122     } else {
2123         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2124         if (is_load) {
2125             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2126                       false, 0, false, false);
2127         } else {
2128             do_gpr_st(s, tcg_rt, tcg_addr, size,
2129                       false, 0, false, false);
2130         }
2131     }
2132     tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2133     if (is_vector) {
2134         if (is_load) {
2135             do_fp_ld(s, rt2, tcg_addr, size);
2136         } else {
2137             do_fp_st(s, rt2, tcg_addr, size);
2138         }
2139     } else {
2140         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2141         if (is_load) {
2142             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
2143                       false, 0, false, false);
2144         } else {
2145             do_gpr_st(s, tcg_rt2, tcg_addr, size,
2146                       false, 0, false, false);
2147         }
2148     }
2149
2150     if (wback) {
2151         if (postindex) {
2152             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2153         } else {
2154             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2155         }
2156         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2157     }
2158 }
2159
2160 /*
2161  * C3.3.8 Load/store (immediate post-indexed)
2162  * C3.3.9 Load/store (immediate pre-indexed)
2163  * C3.3.12 Load/store (unscaled immediate)
2164  *
2165  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2166  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2167  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2168  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2169  *
2170  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2171          10 -> unprivileged
2172  * V = 0 -> non-vector
2173  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2174  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2175  */
2176 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2177                                 int opc,
2178                                 int size,
2179                                 int rt,
2180                                 bool is_vector)
2181 {
2182     int rn = extract32(insn, 5, 5);
2183     int imm9 = sextract32(insn, 12, 9);
2184     int idx = extract32(insn, 10, 2);
2185     bool is_signed = false;
2186     bool is_store = false;
2187     bool is_extended = false;
2188     bool is_unpriv = (idx == 2);
2189     bool iss_valid = !is_vector;
2190     bool post_index;
2191     bool writeback;
2192
2193     TCGv_i64 tcg_addr;
2194
2195     if (is_vector) {
2196         size |= (opc & 2) << 1;
2197         if (size > 4 || is_unpriv) {
2198             unallocated_encoding(s);
2199             return;
2200         }
2201         is_store = ((opc & 1) == 0);
2202         if (!fp_access_check(s)) {
2203             return;
2204         }
2205     } else {
2206         if (size == 3 && opc == 2) {
2207             /* PRFM - prefetch */
2208             if (is_unpriv) {
2209                 unallocated_encoding(s);
2210                 return;
2211             }
2212             return;
2213         }
2214         if (opc == 3 && size > 1) {
2215             unallocated_encoding(s);
2216             return;
2217         }
2218         is_store = (opc == 0);
2219         is_signed = extract32(opc, 1, 1);
2220         is_extended = (size < 3) && extract32(opc, 0, 1);
2221     }
2222
2223     switch (idx) {
2224     case 0:
2225     case 2:
2226         post_index = false;
2227         writeback = false;
2228         break;
2229     case 1:
2230         post_index = true;
2231         writeback = true;
2232         break;
2233     case 3:
2234         post_index = false;
2235         writeback = true;
2236         break;
2237     }
2238
2239     if (rn == 31) {
2240         gen_check_sp_alignment(s);
2241     }
2242     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2243
2244     if (!post_index) {
2245         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2246     }
2247
2248     if (is_vector) {
2249         if (is_store) {
2250             do_fp_st(s, rt, tcg_addr, size);
2251         } else {
2252             do_fp_ld(s, rt, tcg_addr, size);
2253         }
2254     } else {
2255         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2256         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2257         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2258
2259         if (is_store) {
2260             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
2261                              iss_valid, rt, iss_sf, false);
2262         } else {
2263             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2264                              is_signed, is_extended, memidx,
2265                              iss_valid, rt, iss_sf, false);
2266         }
2267     }
2268
2269     if (writeback) {
2270         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2271         if (post_index) {
2272             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2273         }
2274         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2275     }
2276 }
2277
2278 /*
2279  * C3.3.10 Load/store (register offset)
2280  *
2281  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2282  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2283  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2284  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2285  *
2286  * For non-vector:
2287  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2288  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2289  * For vector:
2290  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2291  *   opc<0>: 0 -> store, 1 -> load
2292  * V: 1 -> vector/simd
2293  * opt: extend encoding (see DecodeRegExtend)
2294  * S: if S=1 then scale (essentially index by sizeof(size))
2295  * Rt: register to transfer into/out of
2296  * Rn: address register or SP for base
2297  * Rm: offset register or ZR for offset
2298  */
2299 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2300                                    int opc,
2301                                    int size,
2302                                    int rt,
2303                                    bool is_vector)
2304 {
2305     int rn = extract32(insn, 5, 5);
2306     int shift = extract32(insn, 12, 1);
2307     int rm = extract32(insn, 16, 5);
2308     int opt = extract32(insn, 13, 3);
2309     bool is_signed = false;
2310     bool is_store = false;
2311     bool is_extended = false;
2312
2313     TCGv_i64 tcg_rm;
2314     TCGv_i64 tcg_addr;
2315
2316     if (extract32(opt, 1, 1) == 0) {
2317         unallocated_encoding(s);
2318         return;
2319     }
2320
2321     if (is_vector) {
2322         size |= (opc & 2) << 1;
2323         if (size > 4) {
2324             unallocated_encoding(s);
2325             return;
2326         }
2327         is_store = !extract32(opc, 0, 1);
2328         if (!fp_access_check(s)) {
2329             return;
2330         }
2331     } else {
2332         if (size == 3 && opc == 2) {
2333             /* PRFM - prefetch */
2334             return;
2335         }
2336         if (opc == 3 && size > 1) {
2337             unallocated_encoding(s);
2338             return;
2339         }
2340         is_store = (opc == 0);
2341         is_signed = extract32(opc, 1, 1);
2342         is_extended = (size < 3) && extract32(opc, 0, 1);
2343     }
2344
2345     if (rn == 31) {
2346         gen_check_sp_alignment(s);
2347     }
2348     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2349
2350     tcg_rm = read_cpu_reg(s, rm, 1);
2351     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2352
2353     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2354
2355     if (is_vector) {
2356         if (is_store) {
2357             do_fp_st(s, rt, tcg_addr, size);
2358         } else {
2359             do_fp_ld(s, rt, tcg_addr, size);
2360         }
2361     } else {
2362         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2363         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2364         if (is_store) {
2365             do_gpr_st(s, tcg_rt, tcg_addr, size,
2366                       true, rt, iss_sf, false);
2367         } else {
2368             do_gpr_ld(s, tcg_rt, tcg_addr, size,
2369                       is_signed, is_extended,
2370                       true, rt, iss_sf, false);
2371         }
2372     }
2373 }
2374
2375 /*
2376  * C3.3.13 Load/store (unsigned immediate)
2377  *
2378  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2379  * +----+-------+---+-----+-----+------------+-------+------+
2380  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2381  * +----+-------+---+-----+-----+------------+-------+------+
2382  *
2383  * For non-vector:
2384  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2385  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2386  * For vector:
2387  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2388  *   opc<0>: 0 -> store, 1 -> load
2389  * Rn: base address register (inc SP)
2390  * Rt: target register
2391  */
2392 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
2393                                         int opc,
2394                                         int size,
2395                                         int rt,
2396                                         bool is_vector)
2397 {
2398     int rn = extract32(insn, 5, 5);
2399     unsigned int imm12 = extract32(insn, 10, 12);
2400     unsigned int offset;
2401
2402     TCGv_i64 tcg_addr;
2403
2404     bool is_store;
2405     bool is_signed = false;
2406     bool is_extended = false;
2407
2408     if (is_vector) {
2409         size |= (opc & 2) << 1;
2410         if (size > 4) {
2411             unallocated_encoding(s);
2412             return;
2413         }
2414         is_store = !extract32(opc, 0, 1);
2415         if (!fp_access_check(s)) {
2416             return;
2417         }
2418     } else {
2419         if (size == 3 && opc == 2) {
2420             /* PRFM - prefetch */
2421             return;
2422         }
2423         if (opc == 3 && size > 1) {
2424             unallocated_encoding(s);
2425             return;
2426         }
2427         is_store = (opc == 0);
2428         is_signed = extract32(opc, 1, 1);
2429         is_extended = (size < 3) && extract32(opc, 0, 1);
2430     }
2431
2432     if (rn == 31) {
2433         gen_check_sp_alignment(s);
2434     }
2435     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2436     offset = imm12 << size;
2437     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2438
2439     if (is_vector) {
2440         if (is_store) {
2441             do_fp_st(s, rt, tcg_addr, size);
2442         } else {
2443             do_fp_ld(s, rt, tcg_addr, size);
2444         }
2445     } else {
2446         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2447         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2448         if (is_store) {
2449             do_gpr_st(s, tcg_rt, tcg_addr, size,
2450                       true, rt, iss_sf, false);
2451         } else {
2452             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
2453                       true, rt, iss_sf, false);
2454         }
2455     }
2456 }
2457
2458 /* Load/store register (all forms) */
2459 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2460 {
2461     int rt = extract32(insn, 0, 5);
2462     int opc = extract32(insn, 22, 2);
2463     bool is_vector = extract32(insn, 26, 1);
2464     int size = extract32(insn, 30, 2);
2465
2466     switch (extract32(insn, 24, 2)) {
2467     case 0:
2468         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2469             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
2470         } else {
2471             /* Load/store register (unscaled immediate)
2472              * Load/store immediate pre/post-indexed
2473              * Load/store register unprivileged
2474              */
2475             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
2476         }
2477         break;
2478     case 1:
2479         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
2480         break;
2481     default:
2482         unallocated_encoding(s);
2483         break;
2484     }
2485 }
2486
2487 /* C3.3.1 AdvSIMD load/store multiple structures
2488  *
2489  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2490  * +---+---+---------------+---+-------------+--------+------+------+------+
2491  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2492  * +---+---+---------------+---+-------------+--------+------+------+------+
2493  *
2494  * C3.3.2 AdvSIMD load/store multiple structures (post-indexed)
2495  *
2496  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2497  * +---+---+---------------+---+---+---------+--------+------+------+------+
2498  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2499  * +---+---+---------------+---+---+---------+--------+------+------+------+
2500  *
2501  * Rt: first (or only) SIMD&FP register to be transferred
2502  * Rn: base address or SP
2503  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2504  */
2505 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2506 {
2507     int rt = extract32(insn, 0, 5);
2508     int rn = extract32(insn, 5, 5);
2509     int size = extract32(insn, 10, 2);
2510     int opcode = extract32(insn, 12, 4);
2511     bool is_store = !extract32(insn, 22, 1);
2512     bool is_postidx = extract32(insn, 23, 1);
2513     bool is_q = extract32(insn, 30, 1);
2514     TCGv_i64 tcg_addr, tcg_rn;
2515
2516     int ebytes = 1 << size;
2517     int elements = (is_q ? 128 : 64) / (8 << size);
2518     int rpt;    /* num iterations */
2519     int selem;  /* structure elements */
2520     int r;
2521
2522     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2523         unallocated_encoding(s);
2524         return;
2525     }
2526
2527     /* From the shared decode logic */
2528     switch (opcode) {
2529     case 0x0:
2530         rpt = 1;
2531         selem = 4;
2532         break;
2533     case 0x2:
2534         rpt = 4;
2535         selem = 1;
2536         break;
2537     case 0x4:
2538         rpt = 1;
2539         selem = 3;
2540         break;
2541     case 0x6:
2542         rpt = 3;
2543         selem = 1;
2544         break;
2545     case 0x7:
2546         rpt = 1;
2547         selem = 1;
2548         break;
2549     case 0x8:
2550         rpt = 1;
2551         selem = 2;
2552         break;
2553     case 0xa:
2554         rpt = 2;
2555         selem = 1;
2556         break;
2557     default:
2558         unallocated_encoding(s);
2559         return;
2560     }
2561
2562     if (size == 3 && !is_q && selem != 1) {
2563         /* reserved */
2564         unallocated_encoding(s);
2565         return;
2566     }
2567
2568     if (!fp_access_check(s)) {
2569         return;
2570     }
2571
2572     if (rn == 31) {
2573         gen_check_sp_alignment(s);
2574     }
2575
2576     tcg_rn = cpu_reg_sp(s, rn);
2577     tcg_addr = tcg_temp_new_i64();
2578     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2579
2580     for (r = 0; r < rpt; r++) {
2581         int e;
2582         for (e = 0; e < elements; e++) {
2583             int tt = (rt + r) % 32;
2584             int xs;
2585             for (xs = 0; xs < selem; xs++) {
2586                 if (is_store) {
2587                     do_vec_st(s, tt, e, tcg_addr, size);
2588                 } else {
2589                     do_vec_ld(s, tt, e, tcg_addr, size);
2590
2591                     /* For non-quad operations, setting a slice of the low
2592                      * 64 bits of the register clears the high 64 bits (in
2593                      * the ARM ARM pseudocode this is implicit in the fact
2594                      * that 'rval' is a 64 bit wide variable). We optimize
2595                      * by noticing that we only need to do this the first
2596                      * time we touch a register.
2597                      */
2598                     if (!is_q && e == 0 && (r == 0 || xs == selem - 1)) {
2599                         clear_vec_high(s, tt);
2600                     }
2601                 }
2602                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2603                 tt = (tt + 1) % 32;
2604             }
2605         }
2606     }
2607
2608     if (is_postidx) {
2609         int rm = extract32(insn, 16, 5);
2610         if (rm == 31) {
2611             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2612         } else {
2613             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2614         }
2615     }
2616     tcg_temp_free_i64(tcg_addr);
2617 }
2618
2619 /* C3.3.3 AdvSIMD load/store single structure
2620  *
2621  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2622  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2623  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2624  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2625  *
2626  * C3.3.4 AdvSIMD load/store single structure (post-indexed)
2627  *
2628  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2629  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2630  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2631  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2632  *
2633  * Rt: first (or only) SIMD&FP register to be transferred
2634  * Rn: base address or SP
2635  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2636  * index = encoded in Q:S:size dependent on size
2637  *
2638  * lane_size = encoded in R, opc
2639  * transfer width = encoded in opc, S, size
2640  */
2641 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2642 {
2643     int rt = extract32(insn, 0, 5);
2644     int rn = extract32(insn, 5, 5);
2645     int size = extract32(insn, 10, 2);
2646     int S = extract32(insn, 12, 1);
2647     int opc = extract32(insn, 13, 3);
2648     int R = extract32(insn, 21, 1);
2649     int is_load = extract32(insn, 22, 1);
2650     int is_postidx = extract32(insn, 23, 1);
2651     int is_q = extract32(insn, 30, 1);
2652
2653     int scale = extract32(opc, 1, 2);
2654     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2655     bool replicate = false;
2656     int index = is_q << 3 | S << 2 | size;
2657     int ebytes, xs;
2658     TCGv_i64 tcg_addr, tcg_rn;
2659
2660     switch (scale) {
2661     case 3:
2662         if (!is_load || S) {
2663             unallocated_encoding(s);
2664             return;
2665         }
2666         scale = size;
2667         replicate = true;
2668         break;
2669     case 0:
2670         break;
2671     case 1:
2672         if (extract32(size, 0, 1)) {
2673             unallocated_encoding(s);
2674             return;
2675         }
2676         index >>= 1;
2677         break;
2678     case 2:
2679         if (extract32(size, 1, 1)) {
2680             unallocated_encoding(s);
2681             return;
2682         }
2683         if (!extract32(size, 0, 1)) {
2684             index >>= 2;
2685         } else {
2686             if (S) {
2687                 unallocated_encoding(s);
2688                 return;
2689             }
2690             index >>= 3;
2691             scale = 3;
2692         }
2693         break;
2694     default:
2695         g_assert_not_reached();
2696     }
2697
2698     if (!fp_access_check(s)) {
2699         return;
2700     }
2701
2702     ebytes = 1 << scale;
2703
2704     if (rn == 31) {
2705         gen_check_sp_alignment(s);
2706     }
2707
2708     tcg_rn = cpu_reg_sp(s, rn);
2709     tcg_addr = tcg_temp_new_i64();
2710     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2711
2712     for (xs = 0; xs < selem; xs++) {
2713         if (replicate) {
2714             /* Load and replicate to all elements */
2715             uint64_t mulconst;
2716             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2717
2718             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2719                                 get_mem_index(s), s->be_data + scale);
2720             switch (scale) {
2721             case 0:
2722                 mulconst = 0x0101010101010101ULL;
2723                 break;
2724             case 1:
2725                 mulconst = 0x0001000100010001ULL;
2726                 break;
2727             case 2:
2728                 mulconst = 0x0000000100000001ULL;
2729                 break;
2730             case 3:
2731                 mulconst = 0;
2732                 break;
2733             default:
2734                 g_assert_not_reached();
2735             }
2736             if (mulconst) {
2737                 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2738             }
2739             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2740             if (is_q) {
2741                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2742             } else {
2743                 clear_vec_high(s, rt);
2744             }
2745             tcg_temp_free_i64(tcg_tmp);
2746         } else {
2747             /* Load/store one element per register */
2748             if (is_load) {
2749                 do_vec_ld(s, rt, index, tcg_addr, s->be_data + scale);
2750             } else {
2751                 do_vec_st(s, rt, index, tcg_addr, s->be_data + scale);
2752             }
2753         }
2754         tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2755         rt = (rt + 1) % 32;
2756     }
2757
2758     if (is_postidx) {
2759         int rm = extract32(insn, 16, 5);
2760         if (rm == 31) {
2761             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2762         } else {
2763             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2764         }
2765     }
2766     tcg_temp_free_i64(tcg_addr);
2767 }
2768
2769 /* C3.3 Loads and stores */
2770 static void disas_ldst(DisasContext *s, uint32_t insn)
2771 {
2772     switch (extract32(insn, 24, 6)) {
2773     case 0x08: /* Load/store exclusive */
2774         disas_ldst_excl(s, insn);
2775         break;
2776     case 0x18: case 0x1c: /* Load register (literal) */
2777         disas_ld_lit(s, insn);
2778         break;
2779     case 0x28: case 0x29:
2780     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
2781         disas_ldst_pair(s, insn);
2782         break;
2783     case 0x38: case 0x39:
2784     case 0x3c: case 0x3d: /* Load/store register (all forms) */
2785         disas_ldst_reg(s, insn);
2786         break;
2787     case 0x0c: /* AdvSIMD load/store multiple structures */
2788         disas_ldst_multiple_struct(s, insn);
2789         break;
2790     case 0x0d: /* AdvSIMD load/store single structure */
2791         disas_ldst_single_struct(s, insn);
2792         break;
2793     default:
2794         unallocated_encoding(s);
2795         break;
2796     }
2797 }
2798
2799 /* C3.4.6 PC-rel. addressing
2800  *   31  30   29 28       24 23                5 4    0
2801  * +----+-------+-----------+-------------------+------+
2802  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
2803  * +----+-------+-----------+-------------------+------+
2804  */
2805 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
2806 {
2807     unsigned int page, rd;
2808     uint64_t base;
2809     uint64_t offset;
2810
2811     page = extract32(insn, 31, 1);
2812     /* SignExtend(immhi:immlo) -> offset */
2813     offset = sextract64(insn, 5, 19);
2814     offset = offset << 2 | extract32(insn, 29, 2);
2815     rd = extract32(insn, 0, 5);
2816     base = s->pc - 4;
2817
2818     if (page) {
2819         /* ADRP (page based) */
2820         base &= ~0xfff;
2821         offset <<= 12;
2822     }
2823
2824     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
2825 }
2826
2827 /*
2828  * C3.4.1 Add/subtract (immediate)
2829  *
2830  *  31 30 29 28       24 23 22 21         10 9   5 4   0
2831  * +--+--+--+-----------+-----+-------------+-----+-----+
2832  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
2833  * +--+--+--+-----------+-----+-------------+-----+-----+
2834  *
2835  *    sf: 0 -> 32bit, 1 -> 64bit
2836  *    op: 0 -> add  , 1 -> sub
2837  *     S: 1 -> set flags
2838  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
2839  */
2840 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
2841 {
2842     int rd = extract32(insn, 0, 5);
2843     int rn = extract32(insn, 5, 5);
2844     uint64_t imm = extract32(insn, 10, 12);
2845     int shift = extract32(insn, 22, 2);
2846     bool setflags = extract32(insn, 29, 1);
2847     bool sub_op = extract32(insn, 30, 1);
2848     bool is_64bit = extract32(insn, 31, 1);
2849
2850     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2851     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
2852     TCGv_i64 tcg_result;
2853
2854     switch (shift) {
2855     case 0x0:
2856         break;
2857     case 0x1:
2858         imm <<= 12;
2859         break;
2860     default:
2861         unallocated_encoding(s);
2862         return;
2863     }
2864
2865     tcg_result = tcg_temp_new_i64();
2866     if (!setflags) {
2867         if (sub_op) {
2868             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
2869         } else {
2870             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
2871         }
2872     } else {
2873         TCGv_i64 tcg_imm = tcg_const_i64(imm);
2874         if (sub_op) {
2875             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2876         } else {
2877             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
2878         }
2879         tcg_temp_free_i64(tcg_imm);
2880     }
2881
2882     if (is_64bit) {
2883         tcg_gen_mov_i64(tcg_rd, tcg_result);
2884     } else {
2885         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
2886     }
2887
2888     tcg_temp_free_i64(tcg_result);
2889 }
2890
2891 /* The input should be a value in the bottom e bits (with higher
2892  * bits zero); returns that value replicated into every element
2893  * of size e in a 64 bit integer.
2894  */
2895 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
2896 {
2897     assert(e != 0);
2898     while (e < 64) {
2899         mask |= mask << e;
2900         e *= 2;
2901     }
2902     return mask;
2903 }
2904
2905 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
2906 static inline uint64_t bitmask64(unsigned int length)
2907 {
2908     assert(length > 0 && length <= 64);
2909     return ~0ULL >> (64 - length);
2910 }
2911
2912 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
2913  * only require the wmask. Returns false if the imms/immr/immn are a reserved
2914  * value (ie should cause a guest UNDEF exception), and true if they are
2915  * valid, in which case the decoded bit pattern is written to result.
2916  */
2917 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
2918                                    unsigned int imms, unsigned int immr)
2919 {
2920     uint64_t mask;
2921     unsigned e, levels, s, r;
2922     int len;
2923
2924     assert(immn < 2 && imms < 64 && immr < 64);
2925
2926     /* The bit patterns we create here are 64 bit patterns which
2927      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
2928      * 64 bits each. Each element contains the same value: a run
2929      * of between 1 and e-1 non-zero bits, rotated within the
2930      * element by between 0 and e-1 bits.
2931      *
2932      * The element size and run length are encoded into immn (1 bit)
2933      * and imms (6 bits) as follows:
2934      * 64 bit elements: immn = 1, imms = <length of run - 1>
2935      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
2936      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
2937      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
2938      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
2939      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
2940      * Notice that immn = 0, imms = 11111x is the only combination
2941      * not covered by one of the above options; this is reserved.
2942      * Further, <length of run - 1> all-ones is a reserved pattern.
2943      *
2944      * In all cases the rotation is by immr % e (and immr is 6 bits).
2945      */
2946
2947     /* First determine the element size */
2948     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
2949     if (len < 1) {
2950         /* This is the immn == 0, imms == 0x11111x case */
2951         return false;
2952     }
2953     e = 1 << len;
2954
2955     levels = e - 1;
2956     s = imms & levels;
2957     r = immr & levels;
2958
2959     if (s == levels) {
2960         /* <length of run - 1> mustn't be all-ones. */
2961         return false;
2962     }
2963
2964     /* Create the value of one element: s+1 set bits rotated
2965      * by r within the element (which is e bits wide)...
2966      */
2967     mask = bitmask64(s + 1);
2968     if (r) {
2969         mask = (mask >> r) | (mask << (e - r));
2970         mask &= bitmask64(e);
2971     }
2972     /* ...then replicate the element over the whole 64 bit value */
2973     mask = bitfield_replicate(mask, e);
2974     *result = mask;
2975     return true;
2976 }
2977
2978 /* C3.4.4 Logical (immediate)
2979  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
2980  * +----+-----+-------------+---+------+------+------+------+
2981  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
2982  * +----+-----+-------------+---+------+------+------+------+
2983  */
2984 static void disas_logic_imm(DisasContext *s, uint32_t insn)
2985 {
2986     unsigned int sf, opc, is_n, immr, imms, rn, rd;
2987     TCGv_i64 tcg_rd, tcg_rn;
2988     uint64_t wmask;
2989     bool is_and = false;
2990
2991     sf = extract32(insn, 31, 1);
2992     opc = extract32(insn, 29, 2);
2993     is_n = extract32(insn, 22, 1);
2994     immr = extract32(insn, 16, 6);
2995     imms = extract32(insn, 10, 6);
2996     rn = extract32(insn, 5, 5);
2997     rd = extract32(insn, 0, 5);
2998
2999     if (!sf && is_n) {
3000         unallocated_encoding(s);
3001         return;
3002     }
3003
3004     if (opc == 0x3) { /* ANDS */
3005         tcg_rd = cpu_reg(s, rd);
3006     } else {
3007         tcg_rd = cpu_reg_sp(s, rd);
3008     }
3009     tcg_rn = cpu_reg(s, rn);
3010
3011     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3012         /* some immediate field values are reserved */
3013         unallocated_encoding(s);
3014         return;
3015     }
3016
3017     if (!sf) {
3018         wmask &= 0xffffffff;
3019     }
3020
3021     switch (opc) {
3022     case 0x3: /* ANDS */
3023     case 0x0: /* AND */
3024         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3025         is_and = true;
3026         break;
3027     case 0x1: /* ORR */
3028         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3029         break;
3030     case 0x2: /* EOR */
3031         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3032         break;
3033     default:
3034         assert(FALSE); /* must handle all above */
3035         break;
3036     }
3037
3038     if (!sf && !is_and) {
3039         /* zero extend final result; we know we can skip this for AND
3040          * since the immediate had the high 32 bits clear.
3041          */
3042         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3043     }
3044
3045     if (opc == 3) { /* ANDS */
3046         gen_logic_CC(sf, tcg_rd);
3047     }
3048 }
3049
3050 /*
3051  * C3.4.5 Move wide (immediate)
3052  *
3053  *  31 30 29 28         23 22 21 20             5 4    0
3054  * +--+-----+-------------+-----+----------------+------+
3055  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3056  * +--+-----+-------------+-----+----------------+------+
3057  *
3058  * sf: 0 -> 32 bit, 1 -> 64 bit
3059  * opc: 00 -> N, 10 -> Z, 11 -> K
3060  * hw: shift/16 (0,16, and sf only 32, 48)
3061  */
3062 static void disas_movw_imm(DisasContext *s, uint32_t insn)
3063 {
3064     int rd = extract32(insn, 0, 5);
3065     uint64_t imm = extract32(insn, 5, 16);
3066     int sf = extract32(insn, 31, 1);
3067     int opc = extract32(insn, 29, 2);
3068     int pos = extract32(insn, 21, 2) << 4;
3069     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3070     TCGv_i64 tcg_imm;
3071
3072     if (!sf && (pos >= 32)) {
3073         unallocated_encoding(s);
3074         return;
3075     }
3076
3077     switch (opc) {
3078     case 0: /* MOVN */
3079     case 2: /* MOVZ */
3080         imm <<= pos;
3081         if (opc == 0) {
3082             imm = ~imm;
3083         }
3084         if (!sf) {
3085             imm &= 0xffffffffu;
3086         }
3087         tcg_gen_movi_i64(tcg_rd, imm);
3088         break;
3089     case 3: /* MOVK */
3090         tcg_imm = tcg_const_i64(imm);
3091         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3092         tcg_temp_free_i64(tcg_imm);
3093         if (!sf) {
3094             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3095         }
3096         break;
3097     default:
3098         unallocated_encoding(s);
3099         break;
3100     }
3101 }
3102
3103 /* C3.4.2 Bitfield
3104  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3105  * +----+-----+-------------+---+------+------+------+------+
3106  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3107  * +----+-----+-------------+---+------+------+------+------+
3108  */
3109 static void disas_bitfield(DisasContext *s, uint32_t insn)
3110 {
3111     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3112     TCGv_i64 tcg_rd, tcg_tmp;
3113
3114     sf = extract32(insn, 31, 1);
3115     opc = extract32(insn, 29, 2);
3116     n = extract32(insn, 22, 1);
3117     ri = extract32(insn, 16, 6);
3118     si = extract32(insn, 10, 6);
3119     rn = extract32(insn, 5, 5);
3120     rd = extract32(insn, 0, 5);
3121     bitsize = sf ? 64 : 32;
3122
3123     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3124         unallocated_encoding(s);
3125         return;
3126     }
3127
3128     tcg_rd = cpu_reg(s, rd);
3129
3130     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3131        to be smaller than bitsize, we'll never reference data outside the
3132        low 32-bits anyway.  */
3133     tcg_tmp = read_cpu_reg(s, rn, 1);
3134
3135     /* Recognize the common aliases.  */
3136     if (opc == 0) { /* SBFM */
3137         if (ri == 0) {
3138             if (si == 7) { /* SXTB */
3139                 tcg_gen_ext8s_i64(tcg_rd, tcg_tmp);
3140                 goto done;
3141             } else if (si == 15) { /* SXTH */
3142                 tcg_gen_ext16s_i64(tcg_rd, tcg_tmp);
3143                 goto done;
3144             } else if (si == 31) { /* SXTW */
3145                 tcg_gen_ext32s_i64(tcg_rd, tcg_tmp);
3146                 goto done;
3147             }
3148         }
3149         if (si == 63 || (si == 31 && ri <= si)) { /* ASR */
3150             if (si == 31) {
3151                 tcg_gen_ext32s_i64(tcg_tmp, tcg_tmp);
3152             }
3153             tcg_gen_sari_i64(tcg_rd, tcg_tmp, ri);
3154             goto done;
3155         }
3156     } else if (opc == 2) { /* UBFM */
3157         if (ri == 0) { /* UXTB, UXTH, plus non-canonical AND */
3158             tcg_gen_andi_i64(tcg_rd, tcg_tmp, bitmask64(si + 1));
3159             return;
3160         }
3161         if (si == 63 || (si == 31 && ri <= si)) { /* LSR */
3162             if (si == 31) {
3163                 tcg_gen_ext32u_i64(tcg_tmp, tcg_tmp);
3164             }
3165             tcg_gen_shri_i64(tcg_rd, tcg_tmp, ri);
3166             return;
3167         }
3168         if (si + 1 == ri && si != bitsize - 1) { /* LSL */
3169             int shift = bitsize - 1 - si;
3170             tcg_gen_shli_i64(tcg_rd, tcg_tmp, shift);
3171             goto done;
3172         }
3173     }
3174
3175     if (opc != 1) { /* SBFM or UBFM */
3176         tcg_gen_movi_i64(tcg_rd, 0);
3177     }
3178
3179     /* do the bit move operation */
3180     if (si >= ri) {
3181         /* Wd<s-r:0> = Wn<s:r> */
3182         tcg_gen_shri_i64(tcg_tmp, tcg_tmp, ri);
3183         pos = 0;
3184         len = (si - ri) + 1;
3185     } else {
3186         /* Wd<32+s-r,32-r> = Wn<s:0> */
3187         pos = bitsize - ri;
3188         len = si + 1;
3189     }
3190
3191     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3192
3193     if (opc == 0) { /* SBFM - sign extend the destination field */
3194         tcg_gen_shli_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3195         tcg_gen_sari_i64(tcg_rd, tcg_rd, 64 - (pos + len));
3196     }
3197
3198  done:
3199     if (!sf) { /* zero extend final result */
3200         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3201     }
3202 }
3203
3204 /* C3.4.3 Extract
3205  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3206  * +----+------+-------------+---+----+------+--------+------+------+
3207  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3208  * +----+------+-------------+---+----+------+--------+------+------+
3209  */
3210 static void disas_extract(DisasContext *s, uint32_t insn)
3211 {
3212     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3213
3214     sf = extract32(insn, 31, 1);
3215     n = extract32(insn, 22, 1);
3216     rm = extract32(insn, 16, 5);
3217     imm = extract32(insn, 10, 6);
3218     rn = extract32(insn, 5, 5);
3219     rd = extract32(insn, 0, 5);
3220     op21 = extract32(insn, 29, 2);
3221     op0 = extract32(insn, 21, 1);
3222     bitsize = sf ? 64 : 32;
3223
3224     if (sf != n || op21 || op0 || imm >= bitsize) {
3225         unallocated_encoding(s);
3226     } else {
3227         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3228
3229         tcg_rd = cpu_reg(s, rd);
3230
3231         if (unlikely(imm == 0)) {
3232             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3233              * so an extract from bit 0 is a special case.
3234              */
3235             if (sf) {
3236                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3237             } else {
3238                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3239             }
3240         } else if (rm == rn) { /* ROR */
3241             tcg_rm = cpu_reg(s, rm);
3242             if (sf) {
3243                 tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3244             } else {
3245                 TCGv_i32 tmp = tcg_temp_new_i32();
3246                 tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3247                 tcg_gen_rotri_i32(tmp, tmp, imm);
3248                 tcg_gen_extu_i32_i64(tcg_rd, tmp);
3249                 tcg_temp_free_i32(tmp);
3250             }
3251         } else {
3252             tcg_rm = read_cpu_reg(s, rm, sf);
3253             tcg_rn = read_cpu_reg(s, rn, sf);
3254             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3255             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3256             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3257             if (!sf) {
3258                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3259             }
3260         }
3261     }
3262 }
3263
3264 /* C3.4 Data processing - immediate */
3265 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3266 {
3267     switch (extract32(insn, 23, 6)) {
3268     case 0x20: case 0x21: /* PC-rel. addressing */
3269         disas_pc_rel_adr(s, insn);
3270         break;
3271     case 0x22: case 0x23: /* Add/subtract (immediate) */
3272         disas_add_sub_imm(s, insn);
3273         break;
3274     case 0x24: /* Logical (immediate) */
3275         disas_logic_imm(s, insn);
3276         break;
3277     case 0x25: /* Move wide (immediate) */
3278         disas_movw_imm(s, insn);
3279         break;
3280     case 0x26: /* Bitfield */
3281         disas_bitfield(s, insn);
3282         break;
3283     case 0x27: /* Extract */
3284         disas_extract(s, insn);
3285         break;
3286     default:
3287         unallocated_encoding(s);
3288         break;
3289     }
3290 }
3291
3292 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3293  * Note that it is the caller's responsibility to ensure that the
3294  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3295  * mandated semantics for out of range shifts.
3296  */
3297 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3298                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3299 {
3300     switch (shift_type) {
3301     case A64_SHIFT_TYPE_LSL:
3302         tcg_gen_shl_i64(dst, src, shift_amount);
3303         break;
3304     case A64_SHIFT_TYPE_LSR:
3305         tcg_gen_shr_i64(dst, src, shift_amount);
3306         break;
3307     case A64_SHIFT_TYPE_ASR:
3308         if (!sf) {
3309             tcg_gen_ext32s_i64(dst, src);
3310         }
3311         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3312         break;
3313     case A64_SHIFT_TYPE_ROR:
3314         if (sf) {
3315             tcg_gen_rotr_i64(dst, src, shift_amount);
3316         } else {
3317             TCGv_i32 t0, t1;
3318             t0 = tcg_temp_new_i32();
3319             t1 = tcg_temp_new_i32();
3320             tcg_gen_extrl_i64_i32(t0, src);
3321             tcg_gen_extrl_i64_i32(t1, shift_amount);
3322             tcg_gen_rotr_i32(t0, t0, t1);
3323             tcg_gen_extu_i32_i64(dst, t0);
3324             tcg_temp_free_i32(t0);
3325             tcg_temp_free_i32(t1);
3326         }
3327         break;
3328     default:
3329         assert(FALSE); /* all shift types should be handled */
3330         break;
3331     }
3332
3333     if (!sf) { /* zero extend final result */
3334         tcg_gen_ext32u_i64(dst, dst);
3335     }
3336 }
3337
3338 /* Shift a TCGv src by immediate, put result in dst.
3339  * The shift amount must be in range (this should always be true as the
3340  * relevant instructions will UNDEF on bad shift immediates).
3341  */
3342 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3343                           enum a64_shift_type shift_type, unsigned int shift_i)
3344 {
3345     assert(shift_i < (sf ? 64 : 32));
3346
3347     if (shift_i == 0) {
3348         tcg_gen_mov_i64(dst, src);
3349     } else {
3350         TCGv_i64 shift_const;
3351
3352         shift_const = tcg_const_i64(shift_i);
3353         shift_reg(dst, src, sf, shift_type, shift_const);
3354         tcg_temp_free_i64(shift_const);
3355     }
3356 }
3357
3358 /* C3.5.10 Logical (shifted register)
3359  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3360  * +----+-----+-----------+-------+---+------+--------+------+------+
3361  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3362  * +----+-----+-----------+-------+---+------+--------+------+------+
3363  */
3364 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3365 {
3366     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3367     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3368
3369     sf = extract32(insn, 31, 1);
3370     opc = extract32(insn, 29, 2);
3371     shift_type = extract32(insn, 22, 2);
3372     invert = extract32(insn, 21, 1);
3373     rm = extract32(insn, 16, 5);
3374     shift_amount = extract32(insn, 10, 6);
3375     rn = extract32(insn, 5, 5);
3376     rd = extract32(insn, 0, 5);
3377
3378     if (!sf && (shift_amount & (1 << 5))) {
3379         unallocated_encoding(s);
3380         return;
3381     }
3382
3383     tcg_rd = cpu_reg(s, rd);
3384
3385     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3386         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3387          * register-register MOV and MVN, so it is worth special casing.
3388          */
3389         tcg_rm = cpu_reg(s, rm);
3390         if (invert) {
3391             tcg_gen_not_i64(tcg_rd, tcg_rm);
3392             if (!sf) {
3393                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3394             }
3395         } else {
3396             if (sf) {
3397                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3398             } else {
3399                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3400             }
3401         }
3402         return;
3403     }
3404
3405     tcg_rm = read_cpu_reg(s, rm, sf);
3406
3407     if (shift_amount) {
3408         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3409     }
3410
3411     tcg_rn = cpu_reg(s, rn);
3412
3413     switch (opc | (invert << 2)) {
3414     case 0: /* AND */
3415     case 3: /* ANDS */
3416         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3417         break;
3418     case 1: /* ORR */
3419         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3420         break;
3421     case 2: /* EOR */
3422         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3423         break;
3424     case 4: /* BIC */
3425     case 7: /* BICS */
3426         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3427         break;
3428     case 5: /* ORN */
3429         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3430         break;
3431     case 6: /* EON */
3432         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3433         break;
3434     default:
3435         assert(FALSE);
3436         break;
3437     }
3438
3439     if (!sf) {
3440         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3441     }
3442
3443     if (opc == 3) {
3444         gen_logic_CC(sf, tcg_rd);
3445     }
3446 }
3447
3448 /*
3449  * C3.5.1 Add/subtract (extended register)
3450  *
3451  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3452  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3453  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3454  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3455  *
3456  *  sf: 0 -> 32bit, 1 -> 64bit
3457  *  op: 0 -> add  , 1 -> sub
3458  *   S: 1 -> set flags
3459  * opt: 00
3460  * option: extension type (see DecodeRegExtend)
3461  * imm3: optional shift to Rm
3462  *
3463  * Rd = Rn + LSL(extend(Rm), amount)
3464  */
3465 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3466 {
3467     int rd = extract32(insn, 0, 5);
3468     int rn = extract32(insn, 5, 5);
3469     int imm3 = extract32(insn, 10, 3);
3470     int option = extract32(insn, 13, 3);
3471     int rm = extract32(insn, 16, 5);
3472     bool setflags = extract32(insn, 29, 1);
3473     bool sub_op = extract32(insn, 30, 1);
3474     bool sf = extract32(insn, 31, 1);
3475
3476     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3477     TCGv_i64 tcg_rd;
3478     TCGv_i64 tcg_result;
3479
3480     if (imm3 > 4) {
3481         unallocated_encoding(s);
3482         return;
3483     }
3484
3485     /* non-flag setting ops may use SP */
3486     if (!setflags) {
3487         tcg_rd = cpu_reg_sp(s, rd);
3488     } else {
3489         tcg_rd = cpu_reg(s, rd);
3490     }
3491     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3492
3493     tcg_rm = read_cpu_reg(s, rm, sf);
3494     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3495
3496     tcg_result = tcg_temp_new_i64();
3497
3498     if (!setflags) {
3499         if (sub_op) {
3500             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3501         } else {
3502             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3503         }
3504     } else {
3505         if (sub_op) {
3506             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3507         } else {
3508             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3509         }
3510     }
3511
3512     if (sf) {
3513         tcg_gen_mov_i64(tcg_rd, tcg_result);
3514     } else {
3515         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3516     }
3517
3518     tcg_temp_free_i64(tcg_result);
3519 }
3520
3521 /*
3522  * C3.5.2 Add/subtract (shifted register)
3523  *
3524  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3525  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3526  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3527  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3528  *
3529  *    sf: 0 -> 32bit, 1 -> 64bit
3530  *    op: 0 -> add  , 1 -> sub
3531  *     S: 1 -> set flags
3532  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3533  *  imm6: Shift amount to apply to Rm before the add/sub
3534  */
3535 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3536 {
3537     int rd = extract32(insn, 0, 5);
3538     int rn = extract32(insn, 5, 5);
3539     int imm6 = extract32(insn, 10, 6);
3540     int rm = extract32(insn, 16, 5);
3541     int shift_type = extract32(insn, 22, 2);
3542     bool setflags = extract32(insn, 29, 1);
3543     bool sub_op = extract32(insn, 30, 1);
3544     bool sf = extract32(insn, 31, 1);
3545
3546     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3547     TCGv_i64 tcg_rn, tcg_rm;
3548     TCGv_i64 tcg_result;
3549
3550     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3551         unallocated_encoding(s);
3552         return;
3553     }
3554
3555     tcg_rn = read_cpu_reg(s, rn, sf);
3556     tcg_rm = read_cpu_reg(s, rm, sf);
3557
3558     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3559
3560     tcg_result = tcg_temp_new_i64();
3561
3562     if (!setflags) {
3563         if (sub_op) {
3564             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3565         } else {
3566             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3567         }
3568     } else {
3569         if (sub_op) {
3570             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3571         } else {
3572             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3573         }
3574     }
3575
3576     if (sf) {
3577         tcg_gen_mov_i64(tcg_rd, tcg_result);
3578     } else {
3579         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3580     }
3581
3582     tcg_temp_free_i64(tcg_result);
3583 }
3584
3585 /* C3.5.9 Data-processing (3 source)
3586
3587    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3588   +--+------+-----------+------+------+----+------+------+------+
3589   |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3590   +--+------+-----------+------+------+----+------+------+------+
3591
3592  */
3593 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3594 {
3595     int rd = extract32(insn, 0, 5);
3596     int rn = extract32(insn, 5, 5);
3597     int ra = extract32(insn, 10, 5);
3598     int rm = extract32(insn, 16, 5);
3599     int op_id = (extract32(insn, 29, 3) << 4) |
3600         (extract32(insn, 21, 3) << 1) |
3601         extract32(insn, 15, 1);
3602     bool sf = extract32(insn, 31, 1);
3603     bool is_sub = extract32(op_id, 0, 1);
3604     bool is_high = extract32(op_id, 2, 1);
3605     bool is_signed = false;
3606     TCGv_i64 tcg_op1;
3607     TCGv_i64 tcg_op2;
3608     TCGv_i64 tcg_tmp;
3609
3610     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3611     switch (op_id) {
3612     case 0x42: /* SMADDL */
3613     case 0x43: /* SMSUBL */
3614     case 0x44: /* SMULH */
3615         is_signed = true;
3616         break;
3617     case 0x0: /* MADD (32bit) */
3618     case 0x1: /* MSUB (32bit) */
3619     case 0x40: /* MADD (64bit) */
3620     case 0x41: /* MSUB (64bit) */
3621     case 0x4a: /* UMADDL */
3622     case 0x4b: /* UMSUBL */
3623     case 0x4c: /* UMULH */
3624         break;
3625     default:
3626         unallocated_encoding(s);
3627         return;
3628     }
3629
3630     if (is_high) {
3631         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3632         TCGv_i64 tcg_rd = cpu_reg(s, rd);
3633         TCGv_i64 tcg_rn = cpu_reg(s, rn);
3634         TCGv_i64 tcg_rm = cpu_reg(s, rm);
3635
3636         if (is_signed) {
3637             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3638         } else {
3639             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3640         }
3641
3642         tcg_temp_free_i64(low_bits);
3643         return;
3644     }
3645
3646     tcg_op1 = tcg_temp_new_i64();
3647     tcg_op2 = tcg_temp_new_i64();
3648     tcg_tmp = tcg_temp_new_i64();
3649
3650     if (op_id < 0x42) {
3651         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3652         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3653     } else {
3654         if (is_signed) {
3655             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3656             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3657         } else {
3658             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3659             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3660         }
3661     }
3662
3663     if (ra == 31 && !is_sub) {
3664         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3665         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3666     } else {
3667         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3668         if (is_sub) {
3669             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3670         } else {
3671             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3672         }
3673     }
3674
3675     if (!sf) {
3676         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3677     }
3678
3679     tcg_temp_free_i64(tcg_op1);
3680     tcg_temp_free_i64(tcg_op2);
3681     tcg_temp_free_i64(tcg_tmp);
3682 }
3683
3684 /* C3.5.3 - Add/subtract (with carry)
3685  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3686  * +--+--+--+------------------------+------+---------+------+-----+
3687  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3688  * +--+--+--+------------------------+------+---------+------+-----+
3689  *                                            [000000]
3690  */
3691
3692 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3693 {
3694     unsigned int sf, op, setflags, rm, rn, rd;
3695     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3696
3697     if (extract32(insn, 10, 6) != 0) {
3698         unallocated_encoding(s);
3699         return;
3700     }
3701
3702     sf = extract32(insn, 31, 1);
3703     op = extract32(insn, 30, 1);
3704     setflags = extract32(insn, 29, 1);
3705     rm = extract32(insn, 16, 5);
3706     rn = extract32(insn, 5, 5);
3707     rd = extract32(insn, 0, 5);
3708
3709     tcg_rd = cpu_reg(s, rd);
3710     tcg_rn = cpu_reg(s, rn);
3711
3712     if (op) {
3713         tcg_y = new_tmp_a64(s);
3714         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3715     } else {
3716         tcg_y = cpu_reg(s, rm);
3717     }
3718
3719     if (setflags) {
3720         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3721     } else {
3722         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3723     }
3724 }
3725
3726 /* C3.5.4 - C3.5.5 Conditional compare (immediate / register)
3727  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3728  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3729  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3730  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3731  *        [1]                             y                [0]       [0]
3732  */
3733 static void disas_cc(DisasContext *s, uint32_t insn)
3734 {
3735     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3736     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3737     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3738     DisasCompare c;
3739
3740     if (!extract32(insn, 29, 1)) {
3741         unallocated_encoding(s);
3742         return;
3743     }
3744     if (insn & (1 << 10 | 1 << 4)) {
3745         unallocated_encoding(s);
3746         return;
3747     }
3748     sf = extract32(insn, 31, 1);
3749     op = extract32(insn, 30, 1);
3750     is_imm = extract32(insn, 11, 1);
3751     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3752     cond = extract32(insn, 12, 4);
3753     rn = extract32(insn, 5, 5);
3754     nzcv = extract32(insn, 0, 4);
3755
3756     /* Set T0 = !COND.  */
3757     tcg_t0 = tcg_temp_new_i32();
3758     arm_test_cc(&c, cond);
3759     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3760     arm_free_cc(&c);
3761
3762     /* Load the arguments for the new comparison.  */
3763     if (is_imm) {
3764         tcg_y = new_tmp_a64(s);
3765         tcg_gen_movi_i64(tcg_y, y);
3766     } else {
3767         tcg_y = cpu_reg(s, y);
3768     }
3769     tcg_rn = cpu_reg(s, rn);
3770
3771     /* Set the flags for the new comparison.  */
3772     tcg_tmp = tcg_temp_new_i64();
3773     if (op) {
3774         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3775     } else {
3776         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3777     }
3778     tcg_temp_free_i64(tcg_tmp);
3779
3780     /* If COND was false, force the flags to #nzcv.  Compute two masks
3781      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
3782      * For tcg hosts that support ANDC, we can make do with just T1.
3783      * In either case, allow the tcg optimizer to delete any unused mask.
3784      */
3785     tcg_t1 = tcg_temp_new_i32();
3786     tcg_t2 = tcg_temp_new_i32();
3787     tcg_gen_neg_i32(tcg_t1, tcg_t0);
3788     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
3789
3790     if (nzcv & 8) { /* N */
3791         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
3792     } else {
3793         if (TCG_TARGET_HAS_andc_i32) {
3794             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
3795         } else {
3796             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
3797         }
3798     }
3799     if (nzcv & 4) { /* Z */
3800         if (TCG_TARGET_HAS_andc_i32) {
3801             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
3802         } else {
3803             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
3804         }
3805     } else {
3806         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
3807     }
3808     if (nzcv & 2) { /* C */
3809         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
3810     } else {
3811         if (TCG_TARGET_HAS_andc_i32) {
3812             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
3813         } else {
3814             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
3815         }
3816     }
3817     if (nzcv & 1) { /* V */
3818         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
3819     } else {
3820         if (TCG_TARGET_HAS_andc_i32) {
3821             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
3822         } else {
3823             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
3824         }
3825     }
3826     tcg_temp_free_i32(tcg_t0);
3827     tcg_temp_free_i32(tcg_t1);
3828     tcg_temp_free_i32(tcg_t2);
3829 }
3830
3831 /* C3.5.6 Conditional select
3832  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
3833  * +----+----+---+-----------------+------+------+-----+------+------+
3834  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
3835  * +----+----+---+-----------------+------+------+-----+------+------+
3836  */
3837 static void disas_cond_select(DisasContext *s, uint32_t insn)
3838 {
3839     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
3840     TCGv_i64 tcg_rd, zero;
3841     DisasCompare64 c;
3842
3843     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
3844         /* S == 1 or op2<1> == 1 */
3845         unallocated_encoding(s);
3846         return;
3847     }
3848     sf = extract32(insn, 31, 1);
3849     else_inv = extract32(insn, 30, 1);
3850     rm = extract32(insn, 16, 5);
3851     cond = extract32(insn, 12, 4);
3852     else_inc = extract32(insn, 10, 1);
3853     rn = extract32(insn, 5, 5);
3854     rd = extract32(insn, 0, 5);
3855
3856     tcg_rd = cpu_reg(s, rd);
3857
3858     a64_test_cc(&c, cond);
3859     zero = tcg_const_i64(0);
3860
3861     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
3862         /* CSET & CSETM.  */
3863         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
3864         if (else_inv) {
3865             tcg_gen_neg_i64(tcg_rd, tcg_rd);
3866         }
3867     } else {
3868         TCGv_i64 t_true = cpu_reg(s, rn);
3869         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
3870         if (else_inv && else_inc) {
3871             tcg_gen_neg_i64(t_false, t_false);
3872         } else if (else_inv) {
3873             tcg_gen_not_i64(t_false, t_false);
3874         } else if (else_inc) {
3875             tcg_gen_addi_i64(t_false, t_false, 1);
3876         }
3877         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
3878     }
3879
3880     tcg_temp_free_i64(zero);
3881     a64_free_cc(&c);
3882
3883     if (!sf) {
3884         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3885     }
3886 }
3887
3888 static void handle_clz(DisasContext *s, unsigned int sf,
3889                        unsigned int rn, unsigned int rd)
3890 {
3891     TCGv_i64 tcg_rd, tcg_rn;
3892     tcg_rd = cpu_reg(s, rd);
3893     tcg_rn = cpu_reg(s, rn);
3894
3895     if (sf) {
3896         gen_helper_clz64(tcg_rd, tcg_rn);
3897     } else {
3898         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3899         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3900         gen_helper_clz(tcg_tmp32, tcg_tmp32);
3901         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3902         tcg_temp_free_i32(tcg_tmp32);
3903     }
3904 }
3905
3906 static void handle_cls(DisasContext *s, unsigned int sf,
3907                        unsigned int rn, unsigned int rd)
3908 {
3909     TCGv_i64 tcg_rd, tcg_rn;
3910     tcg_rd = cpu_reg(s, rd);
3911     tcg_rn = cpu_reg(s, rn);
3912
3913     if (sf) {
3914         gen_helper_cls64(tcg_rd, tcg_rn);
3915     } else {
3916         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3917         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3918         gen_helper_cls32(tcg_tmp32, tcg_tmp32);
3919         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3920         tcg_temp_free_i32(tcg_tmp32);
3921     }
3922 }
3923
3924 static void handle_rbit(DisasContext *s, unsigned int sf,
3925                         unsigned int rn, unsigned int rd)
3926 {
3927     TCGv_i64 tcg_rd, tcg_rn;
3928     tcg_rd = cpu_reg(s, rd);
3929     tcg_rn = cpu_reg(s, rn);
3930
3931     if (sf) {
3932         gen_helper_rbit64(tcg_rd, tcg_rn);
3933     } else {
3934         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
3935         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
3936         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
3937         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
3938         tcg_temp_free_i32(tcg_tmp32);
3939     }
3940 }
3941
3942 /* C5.6.149 REV with sf==1, opcode==3 ("REV64") */
3943 static void handle_rev64(DisasContext *s, unsigned int sf,
3944                          unsigned int rn, unsigned int rd)
3945 {
3946     if (!sf) {
3947         unallocated_encoding(s);
3948         return;
3949     }
3950     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
3951 }
3952
3953 /* C5.6.149 REV with sf==0, opcode==2
3954  * C5.6.151 REV32 (sf==1, opcode==2)
3955  */
3956 static void handle_rev32(DisasContext *s, unsigned int sf,
3957                          unsigned int rn, unsigned int rd)
3958 {
3959     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3960
3961     if (sf) {
3962         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3963         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3964
3965         /* bswap32_i64 requires zero high word */
3966         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
3967         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
3968         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3969         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
3970         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
3971
3972         tcg_temp_free_i64(tcg_tmp);
3973     } else {
3974         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
3975         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
3976     }
3977 }
3978
3979 /* C5.6.150 REV16 (opcode==1) */
3980 static void handle_rev16(DisasContext *s, unsigned int sf,
3981                          unsigned int rn, unsigned int rd)
3982 {
3983     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3984     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3985     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
3986
3987     tcg_gen_andi_i64(tcg_tmp, tcg_rn, 0xffff);
3988     tcg_gen_bswap16_i64(tcg_rd, tcg_tmp);
3989
3990     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 16);
3991     tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3992     tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3993     tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 16, 16);
3994
3995     if (sf) {
3996         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
3997         tcg_gen_andi_i64(tcg_tmp, tcg_tmp, 0xffff);
3998         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
3999         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 32, 16);
4000
4001         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 48);
4002         tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
4003         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, 48, 16);
4004     }
4005
4006     tcg_temp_free_i64(tcg_tmp);
4007 }
4008
4009 /* C3.5.7 Data-processing (1 source)
4010  *   31  30  29  28             21 20     16 15    10 9    5 4    0
4011  * +----+---+---+-----------------+---------+--------+------+------+
4012  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4013  * +----+---+---+-----------------+---------+--------+------+------+
4014  */
4015 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4016 {
4017     unsigned int sf, opcode, rn, rd;
4018
4019     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
4020         unallocated_encoding(s);
4021         return;
4022     }
4023
4024     sf = extract32(insn, 31, 1);
4025     opcode = extract32(insn, 10, 6);
4026     rn = extract32(insn, 5, 5);
4027     rd = extract32(insn, 0, 5);
4028
4029     switch (opcode) {
4030     case 0: /* RBIT */
4031         handle_rbit(s, sf, rn, rd);
4032         break;
4033     case 1: /* REV16 */
4034         handle_rev16(s, sf, rn, rd);
4035         break;
4036     case 2: /* REV32 */
4037         handle_rev32(s, sf, rn, rd);
4038         break;
4039     case 3: /* REV64 */
4040         handle_rev64(s, sf, rn, rd);
4041         break;
4042     case 4: /* CLZ */
4043         handle_clz(s, sf, rn, rd);
4044         break;
4045     case 5: /* CLS */
4046         handle_cls(s, sf, rn, rd);
4047         break;
4048     }
4049 }
4050
4051 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
4052                        unsigned int rm, unsigned int rn, unsigned int rd)
4053 {
4054     TCGv_i64 tcg_n, tcg_m, tcg_rd;
4055     tcg_rd = cpu_reg(s, rd);
4056
4057     if (!sf && is_signed) {
4058         tcg_n = new_tmp_a64(s);
4059         tcg_m = new_tmp_a64(s);
4060         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
4061         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
4062     } else {
4063         tcg_n = read_cpu_reg(s, rn, sf);
4064         tcg_m = read_cpu_reg(s, rm, sf);
4065     }
4066
4067     if (is_signed) {
4068         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
4069     } else {
4070         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
4071     }
4072
4073     if (!sf) { /* zero extend final result */
4074         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4075     }
4076 }
4077
4078 /* C5.6.115 LSLV, C5.6.118 LSRV, C5.6.17 ASRV, C5.6.154 RORV */
4079 static void handle_shift_reg(DisasContext *s,
4080                              enum a64_shift_type shift_type, unsigned int sf,
4081                              unsigned int rm, unsigned int rn, unsigned int rd)
4082 {
4083     TCGv_i64 tcg_shift = tcg_temp_new_i64();
4084     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4085     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4086
4087     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
4088     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
4089     tcg_temp_free_i64(tcg_shift);
4090 }
4091
4092 /* CRC32[BHWX], CRC32C[BHWX] */
4093 static void handle_crc32(DisasContext *s,
4094                          unsigned int sf, unsigned int sz, bool crc32c,
4095                          unsigned int rm, unsigned int rn, unsigned int rd)
4096 {
4097     TCGv_i64 tcg_acc, tcg_val;
4098     TCGv_i32 tcg_bytes;
4099
4100     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
4101         || (sf == 1 && sz != 3)
4102         || (sf == 0 && sz == 3)) {
4103         unallocated_encoding(s);
4104         return;
4105     }
4106
4107     if (sz == 3) {
4108         tcg_val = cpu_reg(s, rm);
4109     } else {
4110         uint64_t mask;
4111         switch (sz) {
4112         case 0:
4113             mask = 0xFF;
4114             break;
4115         case 1:
4116             mask = 0xFFFF;
4117             break;
4118         case 2:
4119             mask = 0xFFFFFFFF;
4120             break;
4121         default:
4122             g_assert_not_reached();
4123         }
4124         tcg_val = new_tmp_a64(s);
4125         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4126     }
4127
4128     tcg_acc = cpu_reg(s, rn);
4129     tcg_bytes = tcg_const_i32(1 << sz);
4130
4131     if (crc32c) {
4132         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4133     } else {
4134         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4135     }
4136
4137     tcg_temp_free_i32(tcg_bytes);
4138 }
4139
4140 /* C3.5.8 Data-processing (2 source)
4141  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4142  * +----+---+---+-----------------+------+--------+------+------+
4143  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4144  * +----+---+---+-----------------+------+--------+------+------+
4145  */
4146 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4147 {
4148     unsigned int sf, rm, opcode, rn, rd;
4149     sf = extract32(insn, 31, 1);
4150     rm = extract32(insn, 16, 5);
4151     opcode = extract32(insn, 10, 6);
4152     rn = extract32(insn, 5, 5);
4153     rd = extract32(insn, 0, 5);
4154
4155     if (extract32(insn, 29, 1)) {
4156         unallocated_encoding(s);
4157         return;
4158     }
4159
4160     switch (opcode) {
4161     case 2: /* UDIV */
4162         handle_div(s, false, sf, rm, rn, rd);
4163         break;
4164     case 3: /* SDIV */
4165         handle_div(s, true, sf, rm, rn, rd);
4166         break;
4167     case 8: /* LSLV */
4168         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4169         break;
4170     case 9: /* LSRV */
4171         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4172         break;
4173     case 10: /* ASRV */
4174         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4175         break;
4176     case 11: /* RORV */
4177         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4178         break;
4179     case 16:
4180     case 17:
4181     case 18:
4182     case 19:
4183     case 20:
4184     case 21:
4185     case 22:
4186     case 23: /* CRC32 */
4187     {
4188         int sz = extract32(opcode, 0, 2);
4189         bool crc32c = extract32(opcode, 2, 1);
4190         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4191         break;
4192     }
4193     default:
4194         unallocated_encoding(s);
4195         break;
4196     }
4197 }
4198
4199 /* C3.5 Data processing - register */
4200 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4201 {
4202     switch (extract32(insn, 24, 5)) {
4203     case 0x0a: /* Logical (shifted register) */
4204         disas_logic_reg(s, insn);
4205         break;
4206     case 0x0b: /* Add/subtract */
4207         if (insn & (1 << 21)) { /* (extended register) */
4208             disas_add_sub_ext_reg(s, insn);
4209         } else {
4210             disas_add_sub_reg(s, insn);
4211         }
4212         break;
4213     case 0x1b: /* Data-processing (3 source) */
4214         disas_data_proc_3src(s, insn);
4215         break;
4216     case 0x1a:
4217         switch (extract32(insn, 21, 3)) {
4218         case 0x0: /* Add/subtract (with carry) */
4219             disas_adc_sbc(s, insn);
4220             break;
4221         case 0x2: /* Conditional compare */
4222             disas_cc(s, insn); /* both imm and reg forms */
4223             break;
4224         case 0x4: /* Conditional select */
4225             disas_cond_select(s, insn);
4226             break;
4227         case 0x6: /* Data-processing */
4228             if (insn & (1 << 30)) { /* (1 source) */
4229                 disas_data_proc_1src(s, insn);
4230             } else {            /* (2 source) */
4231                 disas_data_proc_2src(s, insn);
4232             }
4233             break;
4234         default:
4235             unallocated_encoding(s);
4236             break;
4237         }
4238         break;
4239     default:
4240         unallocated_encoding(s);
4241         break;
4242     }
4243 }
4244
4245 static void handle_fp_compare(DisasContext *s, bool is_double,
4246                               unsigned int rn, unsigned int rm,
4247                               bool cmp_with_zero, bool signal_all_nans)
4248 {
4249     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4250     TCGv_ptr fpst = get_fpstatus_ptr();
4251
4252     if (is_double) {
4253         TCGv_i64 tcg_vn, tcg_vm;
4254
4255         tcg_vn = read_fp_dreg(s, rn);
4256         if (cmp_with_zero) {
4257             tcg_vm = tcg_const_i64(0);
4258         } else {
4259             tcg_vm = read_fp_dreg(s, rm);
4260         }
4261         if (signal_all_nans) {
4262             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4263         } else {
4264             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4265         }
4266         tcg_temp_free_i64(tcg_vn);
4267         tcg_temp_free_i64(tcg_vm);
4268     } else {
4269         TCGv_i32 tcg_vn, tcg_vm;
4270
4271         tcg_vn = read_fp_sreg(s, rn);
4272         if (cmp_with_zero) {
4273             tcg_vm = tcg_const_i32(0);
4274         } else {
4275             tcg_vm = read_fp_sreg(s, rm);
4276         }
4277         if (signal_all_nans) {
4278             gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4279         } else {
4280             gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4281         }
4282         tcg_temp_free_i32(tcg_vn);
4283         tcg_temp_free_i32(tcg_vm);
4284     }
4285
4286     tcg_temp_free_ptr(fpst);
4287
4288     gen_set_nzcv(tcg_flags);
4289
4290     tcg_temp_free_i64(tcg_flags);
4291 }
4292
4293 /* C3.6.22 Floating point compare
4294  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4295  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4296  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4297  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4298  */
4299 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4300 {
4301     unsigned int mos, type, rm, op, rn, opc, op2r;
4302
4303     mos = extract32(insn, 29, 3);
4304     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4305     rm = extract32(insn, 16, 5);
4306     op = extract32(insn, 14, 2);
4307     rn = extract32(insn, 5, 5);
4308     opc = extract32(insn, 3, 2);
4309     op2r = extract32(insn, 0, 3);
4310
4311     if (mos || op || op2r || type > 1) {
4312         unallocated_encoding(s);
4313         return;
4314     }
4315
4316     if (!fp_access_check(s)) {
4317         return;
4318     }
4319
4320     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4321 }
4322
4323 /* C3.6.23 Floating point conditional compare
4324  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4325  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4326  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4327  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4328  */
4329 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4330 {
4331     unsigned int mos, type, rm, cond, rn, op, nzcv;
4332     TCGv_i64 tcg_flags;
4333     TCGLabel *label_continue = NULL;
4334
4335     mos = extract32(insn, 29, 3);
4336     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4337     rm = extract32(insn, 16, 5);
4338     cond = extract32(insn, 12, 4);
4339     rn = extract32(insn, 5, 5);
4340     op = extract32(insn, 4, 1);
4341     nzcv = extract32(insn, 0, 4);
4342
4343     if (mos || type > 1) {
4344         unallocated_encoding(s);
4345         return;
4346     }
4347
4348     if (!fp_access_check(s)) {
4349         return;
4350     }
4351
4352     if (cond < 0x0e) { /* not always */
4353         TCGLabel *label_match = gen_new_label();
4354         label_continue = gen_new_label();
4355         arm_gen_test_cc(cond, label_match);
4356         /* nomatch: */
4357         tcg_flags = tcg_const_i64(nzcv << 28);
4358         gen_set_nzcv(tcg_flags);
4359         tcg_temp_free_i64(tcg_flags);
4360         tcg_gen_br(label_continue);
4361         gen_set_label(label_match);
4362     }
4363
4364     handle_fp_compare(s, type, rn, rm, false, op);
4365
4366     if (cond < 0x0e) {
4367         gen_set_label(label_continue);
4368     }
4369 }
4370
4371 /* C3.6.24 Floating point conditional select
4372  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4373  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4374  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4375  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4376  */
4377 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4378 {
4379     unsigned int mos, type, rm, cond, rn, rd;
4380     TCGv_i64 t_true, t_false, t_zero;
4381     DisasCompare64 c;
4382
4383     mos = extract32(insn, 29, 3);
4384     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4385     rm = extract32(insn, 16, 5);
4386     cond = extract32(insn, 12, 4);
4387     rn = extract32(insn, 5, 5);
4388     rd = extract32(insn, 0, 5);
4389
4390     if (mos || type > 1) {
4391         unallocated_encoding(s);
4392         return;
4393     }
4394
4395     if (!fp_access_check(s)) {
4396         return;
4397     }
4398
4399     /* Zero extend sreg inputs to 64 bits now.  */
4400     t_true = tcg_temp_new_i64();
4401     t_false = tcg_temp_new_i64();
4402     read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4403     read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4404
4405     a64_test_cc(&c, cond);
4406     t_zero = tcg_const_i64(0);
4407     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4408     tcg_temp_free_i64(t_zero);
4409     tcg_temp_free_i64(t_false);
4410     a64_free_cc(&c);
4411
4412     /* Note that sregs write back zeros to the high bits,
4413        and we've already done the zero-extension.  */
4414     write_fp_dreg(s, rd, t_true);
4415     tcg_temp_free_i64(t_true);
4416 }
4417
4418 /* C3.6.25 Floating-point data-processing (1 source) - single precision */
4419 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4420 {
4421     TCGv_ptr fpst;
4422     TCGv_i32 tcg_op;
4423     TCGv_i32 tcg_res;
4424
4425     fpst = get_fpstatus_ptr();
4426     tcg_op = read_fp_sreg(s, rn);
4427     tcg_res = tcg_temp_new_i32();
4428
4429     switch (opcode) {
4430     case 0x0: /* FMOV */
4431         tcg_gen_mov_i32(tcg_res, tcg_op);
4432         break;
4433     case 0x1: /* FABS */
4434         gen_helper_vfp_abss(tcg_res, tcg_op);
4435         break;
4436     case 0x2: /* FNEG */
4437         gen_helper_vfp_negs(tcg_res, tcg_op);
4438         break;
4439     case 0x3: /* FSQRT */
4440         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4441         break;
4442     case 0x8: /* FRINTN */
4443     case 0x9: /* FRINTP */
4444     case 0xa: /* FRINTM */
4445     case 0xb: /* FRINTZ */
4446     case 0xc: /* FRINTA */
4447     {
4448         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4449
4450         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4451         gen_helper_rints(tcg_res, tcg_op, fpst);
4452
4453         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4454         tcg_temp_free_i32(tcg_rmode);
4455         break;
4456     }
4457     case 0xe: /* FRINTX */
4458         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4459         break;
4460     case 0xf: /* FRINTI */
4461         gen_helper_rints(tcg_res, tcg_op, fpst);
4462         break;
4463     default:
4464         abort();
4465     }
4466
4467     write_fp_sreg(s, rd, tcg_res);
4468
4469     tcg_temp_free_ptr(fpst);
4470     tcg_temp_free_i32(tcg_op);
4471     tcg_temp_free_i32(tcg_res);
4472 }
4473
4474 /* C3.6.25 Floating-point data-processing (1 source) - double precision */
4475 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4476 {
4477     TCGv_ptr fpst;
4478     TCGv_i64 tcg_op;
4479     TCGv_i64 tcg_res;
4480
4481     fpst = get_fpstatus_ptr();
4482     tcg_op = read_fp_dreg(s, rn);
4483     tcg_res = tcg_temp_new_i64();
4484
4485     switch (opcode) {
4486     case 0x0: /* FMOV */
4487         tcg_gen_mov_i64(tcg_res, tcg_op);
4488         break;
4489     case 0x1: /* FABS */
4490         gen_helper_vfp_absd(tcg_res, tcg_op);
4491         break;
4492     case 0x2: /* FNEG */
4493         gen_helper_vfp_negd(tcg_res, tcg_op);
4494         break;
4495     case 0x3: /* FSQRT */
4496         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4497         break;
4498     case 0x8: /* FRINTN */
4499     case 0x9: /* FRINTP */
4500     case 0xa: /* FRINTM */
4501     case 0xb: /* FRINTZ */
4502     case 0xc: /* FRINTA */
4503     {
4504         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4505
4506         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4507         gen_helper_rintd(tcg_res, tcg_op, fpst);
4508
4509         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
4510         tcg_temp_free_i32(tcg_rmode);
4511         break;
4512     }
4513     case 0xe: /* FRINTX */
4514         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4515         break;
4516     case 0xf: /* FRINTI */
4517         gen_helper_rintd(tcg_res, tcg_op, fpst);
4518         break;
4519     default:
4520         abort();
4521     }
4522
4523     write_fp_dreg(s, rd, tcg_res);
4524
4525     tcg_temp_free_ptr(fpst);
4526     tcg_temp_free_i64(tcg_op);
4527     tcg_temp_free_i64(tcg_res);
4528 }
4529
4530 static void handle_fp_fcvt(DisasContext *s, int opcode,
4531                            int rd, int rn, int dtype, int ntype)
4532 {
4533     switch (ntype) {
4534     case 0x0:
4535     {
4536         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4537         if (dtype == 1) {
4538             /* Single to double */
4539             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4540             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4541             write_fp_dreg(s, rd, tcg_rd);
4542             tcg_temp_free_i64(tcg_rd);
4543         } else {
4544             /* Single to half */
4545             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4546             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4547             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4548             write_fp_sreg(s, rd, tcg_rd);
4549             tcg_temp_free_i32(tcg_rd);
4550         }
4551         tcg_temp_free_i32(tcg_rn);
4552         break;
4553     }
4554     case 0x1:
4555     {
4556         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4557         TCGv_i32 tcg_rd = tcg_temp_new_i32();
4558         if (dtype == 0) {
4559             /* Double to single */
4560             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4561         } else {
4562             /* Double to half */
4563             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4564             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4565         }
4566         write_fp_sreg(s, rd, tcg_rd);
4567         tcg_temp_free_i32(tcg_rd);
4568         tcg_temp_free_i64(tcg_rn);
4569         break;
4570     }
4571     case 0x3:
4572     {
4573         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4574         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4575         if (dtype == 0) {
4576             /* Half to single */
4577             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4578             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4579             write_fp_sreg(s, rd, tcg_rd);
4580             tcg_temp_free_i32(tcg_rd);
4581         } else {
4582             /* Half to double */
4583             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4584             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4585             write_fp_dreg(s, rd, tcg_rd);
4586             tcg_temp_free_i64(tcg_rd);
4587         }
4588         tcg_temp_free_i32(tcg_rn);
4589         break;
4590     }
4591     default:
4592         abort();
4593     }
4594 }
4595
4596 /* C3.6.25 Floating point data-processing (1 source)
4597  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4598  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4599  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4600  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4601  */
4602 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4603 {
4604     int type = extract32(insn, 22, 2);
4605     int opcode = extract32(insn, 15, 6);
4606     int rn = extract32(insn, 5, 5);
4607     int rd = extract32(insn, 0, 5);
4608
4609     switch (opcode) {
4610     case 0x4: case 0x5: case 0x7:
4611     {
4612         /* FCVT between half, single and double precision */
4613         int dtype = extract32(opcode, 0, 2);
4614         if (type == 2 || dtype == type) {
4615             unallocated_encoding(s);
4616             return;
4617         }
4618         if (!fp_access_check(s)) {
4619             return;
4620         }
4621
4622         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4623         break;
4624     }
4625     case 0x0 ... 0x3:
4626     case 0x8 ... 0xc:
4627     case 0xe ... 0xf:
4628         /* 32-to-32 and 64-to-64 ops */
4629         switch (type) {
4630         case 0:
4631             if (!fp_access_check(s)) {
4632                 return;
4633             }
4634
4635             handle_fp_1src_single(s, opcode, rd, rn);
4636             break;
4637         case 1:
4638             if (!fp_access_check(s)) {
4639                 return;
4640             }
4641
4642             handle_fp_1src_double(s, opcode, rd, rn);
4643             break;
4644         default:
4645             unallocated_encoding(s);
4646         }
4647         break;
4648     default:
4649         unallocated_encoding(s);
4650         break;
4651     }
4652 }
4653
4654 /* C3.6.26 Floating-point data-processing (2 source) - single precision */
4655 static void handle_fp_2src_single(DisasContext *s, int opcode,
4656                                   int rd, int rn, int rm)
4657 {
4658     TCGv_i32 tcg_op1;
4659     TCGv_i32 tcg_op2;
4660     TCGv_i32 tcg_res;
4661     TCGv_ptr fpst;
4662
4663     tcg_res = tcg_temp_new_i32();
4664     fpst = get_fpstatus_ptr();
4665     tcg_op1 = read_fp_sreg(s, rn);
4666     tcg_op2 = read_fp_sreg(s, rm);
4667
4668     switch (opcode) {
4669     case 0x0: /* FMUL */
4670         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4671         break;
4672     case 0x1: /* FDIV */
4673         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4674         break;
4675     case 0x2: /* FADD */
4676         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4677         break;
4678     case 0x3: /* FSUB */
4679         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4680         break;
4681     case 0x4: /* FMAX */
4682         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4683         break;
4684     case 0x5: /* FMIN */
4685         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4686         break;
4687     case 0x6: /* FMAXNM */
4688         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4689         break;
4690     case 0x7: /* FMINNM */
4691         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4692         break;
4693     case 0x8: /* FNMUL */
4694         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4695         gen_helper_vfp_negs(tcg_res, tcg_res);
4696         break;
4697     }
4698
4699     write_fp_sreg(s, rd, tcg_res);
4700
4701     tcg_temp_free_ptr(fpst);
4702     tcg_temp_free_i32(tcg_op1);
4703     tcg_temp_free_i32(tcg_op2);
4704     tcg_temp_free_i32(tcg_res);
4705 }
4706
4707 /* C3.6.26 Floating-point data-processing (2 source) - double precision */
4708 static void handle_fp_2src_double(DisasContext *s, int opcode,
4709                                   int rd, int rn, int rm)
4710 {
4711     TCGv_i64 tcg_op1;
4712     TCGv_i64 tcg_op2;
4713     TCGv_i64 tcg_res;
4714     TCGv_ptr fpst;
4715
4716     tcg_res = tcg_temp_new_i64();
4717     fpst = get_fpstatus_ptr();
4718     tcg_op1 = read_fp_dreg(s, rn);
4719     tcg_op2 = read_fp_dreg(s, rm);
4720
4721     switch (opcode) {
4722     case 0x0: /* FMUL */
4723         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4724         break;
4725     case 0x1: /* FDIV */
4726         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
4727         break;
4728     case 0x2: /* FADD */
4729         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
4730         break;
4731     case 0x3: /* FSUB */
4732         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
4733         break;
4734     case 0x4: /* FMAX */
4735         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
4736         break;
4737     case 0x5: /* FMIN */
4738         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
4739         break;
4740     case 0x6: /* FMAXNM */
4741         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4742         break;
4743     case 0x7: /* FMINNM */
4744         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
4745         break;
4746     case 0x8: /* FNMUL */
4747         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
4748         gen_helper_vfp_negd(tcg_res, tcg_res);
4749         break;
4750     }
4751
4752     write_fp_dreg(s, rd, tcg_res);
4753
4754     tcg_temp_free_ptr(fpst);
4755     tcg_temp_free_i64(tcg_op1);
4756     tcg_temp_free_i64(tcg_op2);
4757     tcg_temp_free_i64(tcg_res);
4758 }
4759
4760 /* C3.6.26 Floating point data-processing (2 source)
4761  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
4762  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4763  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
4764  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
4765  */
4766 static void disas_fp_2src(DisasContext *s, uint32_t insn)
4767 {
4768     int type = extract32(insn, 22, 2);
4769     int rd = extract32(insn, 0, 5);
4770     int rn = extract32(insn, 5, 5);
4771     int rm = extract32(insn, 16, 5);
4772     int opcode = extract32(insn, 12, 4);
4773
4774     if (opcode > 8) {
4775         unallocated_encoding(s);
4776         return;
4777     }
4778
4779     switch (type) {
4780     case 0:
4781         if (!fp_access_check(s)) {
4782             return;
4783         }
4784         handle_fp_2src_single(s, opcode, rd, rn, rm);
4785         break;
4786     case 1:
4787         if (!fp_access_check(s)) {
4788             return;
4789         }
4790         handle_fp_2src_double(s, opcode, rd, rn, rm);
4791         break;
4792     default:
4793         unallocated_encoding(s);
4794     }
4795 }
4796
4797 /* C3.6.27 Floating-point data-processing (3 source) - single precision */
4798 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
4799                                   int rd, int rn, int rm, int ra)
4800 {
4801     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
4802     TCGv_i32 tcg_res = tcg_temp_new_i32();
4803     TCGv_ptr fpst = get_fpstatus_ptr();
4804
4805     tcg_op1 = read_fp_sreg(s, rn);
4806     tcg_op2 = read_fp_sreg(s, rm);
4807     tcg_op3 = read_fp_sreg(s, ra);
4808
4809     /* These are fused multiply-add, and must be done as one
4810      * floating point operation with no rounding between the
4811      * multiplication and addition steps.
4812      * NB that doing the negations here as separate steps is
4813      * correct : an input NaN should come out with its sign bit
4814      * flipped if it is a negated-input.
4815      */
4816     if (o1 == true) {
4817         gen_helper_vfp_negs(tcg_op3, tcg_op3);
4818     }
4819
4820     if (o0 != o1) {
4821         gen_helper_vfp_negs(tcg_op1, tcg_op1);
4822     }
4823
4824     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4825
4826     write_fp_sreg(s, rd, tcg_res);
4827
4828     tcg_temp_free_ptr(fpst);
4829     tcg_temp_free_i32(tcg_op1);
4830     tcg_temp_free_i32(tcg_op2);
4831     tcg_temp_free_i32(tcg_op3);
4832     tcg_temp_free_i32(tcg_res);
4833 }
4834
4835 /* C3.6.27 Floating-point data-processing (3 source) - double precision */
4836 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
4837                                   int rd, int rn, int rm, int ra)
4838 {
4839     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
4840     TCGv_i64 tcg_res = tcg_temp_new_i64();
4841     TCGv_ptr fpst = get_fpstatus_ptr();
4842
4843     tcg_op1 = read_fp_dreg(s, rn);
4844     tcg_op2 = read_fp_dreg(s, rm);
4845     tcg_op3 = read_fp_dreg(s, ra);
4846
4847     /* These are fused multiply-add, and must be done as one
4848      * floating point operation with no rounding between the
4849      * multiplication and addition steps.
4850      * NB that doing the negations here as separate steps is
4851      * correct : an input NaN should come out with its sign bit
4852      * flipped if it is a negated-input.
4853      */
4854     if (o1 == true) {
4855         gen_helper_vfp_negd(tcg_op3, tcg_op3);
4856     }
4857
4858     if (o0 != o1) {
4859         gen_helper_vfp_negd(tcg_op1, tcg_op1);
4860     }
4861
4862     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
4863
4864     write_fp_dreg(s, rd, tcg_res);
4865
4866     tcg_temp_free_ptr(fpst);
4867     tcg_temp_free_i64(tcg_op1);
4868     tcg_temp_free_i64(tcg_op2);
4869     tcg_temp_free_i64(tcg_op3);
4870     tcg_temp_free_i64(tcg_res);
4871 }
4872
4873 /* C3.6.27 Floating point data-processing (3 source)
4874  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
4875  * +---+---+---+-----------+------+----+------+----+------+------+------+
4876  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4877  * +---+---+---+-----------+------+----+------+----+------+------+------+
4878  */
4879 static void disas_fp_3src(DisasContext *s, uint32_t insn)
4880 {
4881     int type = extract32(insn, 22, 2);
4882     int rd = extract32(insn, 0, 5);
4883     int rn = extract32(insn, 5, 5);
4884     int ra = extract32(insn, 10, 5);
4885     int rm = extract32(insn, 16, 5);
4886     bool o0 = extract32(insn, 15, 1);
4887     bool o1 = extract32(insn, 21, 1);
4888
4889     switch (type) {
4890     case 0:
4891         if (!fp_access_check(s)) {
4892             return;
4893         }
4894         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
4895         break;
4896     case 1:
4897         if (!fp_access_check(s)) {
4898             return;
4899         }
4900         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
4901         break;
4902     default:
4903         unallocated_encoding(s);
4904     }
4905 }
4906
4907 /* C3.6.28 Floating point immediate
4908  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
4909  * +---+---+---+-----------+------+---+------------+-------+------+------+
4910  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
4911  * +---+---+---+-----------+------+---+------------+-------+------+------+
4912  */
4913 static void disas_fp_imm(DisasContext *s, uint32_t insn)
4914 {
4915     int rd = extract32(insn, 0, 5);
4916     int imm8 = extract32(insn, 13, 8);
4917     int is_double = extract32(insn, 22, 2);
4918     uint64_t imm;
4919     TCGv_i64 tcg_res;
4920
4921     if (is_double > 1) {
4922         unallocated_encoding(s);
4923         return;
4924     }
4925
4926     if (!fp_access_check(s)) {
4927         return;
4928     }
4929
4930     /* The imm8 encodes the sign bit, enough bits to represent
4931      * an exponent in the range 01....1xx to 10....0xx,
4932      * and the most significant 4 bits of the mantissa; see
4933      * VFPExpandImm() in the v8 ARM ARM.
4934      */
4935     if (is_double) {
4936         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4937             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
4938             extract32(imm8, 0, 6);
4939         imm <<= 48;
4940     } else {
4941         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
4942             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
4943             (extract32(imm8, 0, 6) << 3);
4944         imm <<= 16;
4945     }
4946
4947     tcg_res = tcg_const_i64(imm);
4948     write_fp_dreg(s, rd, tcg_res);
4949     tcg_temp_free_i64(tcg_res);
4950 }
4951
4952 /* Handle floating point <=> fixed point conversions. Note that we can
4953  * also deal with fp <=> integer conversions as a special case (scale == 64)
4954  * OPTME: consider handling that special case specially or at least skipping
4955  * the call to scalbn in the helpers for zero shifts.
4956  */
4957 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
4958                            bool itof, int rmode, int scale, int sf, int type)
4959 {
4960     bool is_signed = !(opcode & 1);
4961     bool is_double = type;
4962     TCGv_ptr tcg_fpstatus;
4963     TCGv_i32 tcg_shift;
4964
4965     tcg_fpstatus = get_fpstatus_ptr();
4966
4967     tcg_shift = tcg_const_i32(64 - scale);
4968
4969     if (itof) {
4970         TCGv_i64 tcg_int = cpu_reg(s, rn);
4971         if (!sf) {
4972             TCGv_i64 tcg_extend = new_tmp_a64(s);
4973
4974             if (is_signed) {
4975                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
4976             } else {
4977                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
4978             }
4979
4980             tcg_int = tcg_extend;
4981         }
4982
4983         if (is_double) {
4984             TCGv_i64 tcg_double = tcg_temp_new_i64();
4985             if (is_signed) {
4986                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
4987                                      tcg_shift, tcg_fpstatus);
4988             } else {
4989                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
4990                                      tcg_shift, tcg_fpstatus);
4991             }
4992             write_fp_dreg(s, rd, tcg_double);
4993             tcg_temp_free_i64(tcg_double);
4994         } else {
4995             TCGv_i32 tcg_single = tcg_temp_new_i32();
4996             if (is_signed) {
4997                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
4998                                      tcg_shift, tcg_fpstatus);
4999             } else {
5000                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
5001                                      tcg_shift, tcg_fpstatus);
5002             }
5003             write_fp_sreg(s, rd, tcg_single);
5004             tcg_temp_free_i32(tcg_single);
5005         }
5006     } else {
5007         TCGv_i64 tcg_int = cpu_reg(s, rd);
5008         TCGv_i32 tcg_rmode;
5009
5010         if (extract32(opcode, 2, 1)) {
5011             /* There are too many rounding modes to all fit into rmode,
5012              * so FCVTA[US] is a special case.
5013              */
5014             rmode = FPROUNDING_TIEAWAY;
5015         }
5016
5017         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5018
5019         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5020
5021         if (is_double) {
5022             TCGv_i64 tcg_double = read_fp_dreg(s, rn);
5023             if (is_signed) {
5024                 if (!sf) {
5025                     gen_helper_vfp_tosld(tcg_int, tcg_double,
5026                                          tcg_shift, tcg_fpstatus);
5027                 } else {
5028                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
5029                                          tcg_shift, tcg_fpstatus);
5030                 }
5031             } else {
5032                 if (!sf) {
5033                     gen_helper_vfp_tould(tcg_int, tcg_double,
5034                                          tcg_shift, tcg_fpstatus);
5035                 } else {
5036                     gen_helper_vfp_touqd(tcg_int, tcg_double,
5037                                          tcg_shift, tcg_fpstatus);
5038                 }
5039             }
5040             tcg_temp_free_i64(tcg_double);
5041         } else {
5042             TCGv_i32 tcg_single = read_fp_sreg(s, rn);
5043             if (sf) {
5044                 if (is_signed) {
5045                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
5046                                          tcg_shift, tcg_fpstatus);
5047                 } else {
5048                     gen_helper_vfp_touqs(tcg_int, tcg_single,
5049                                          tcg_shift, tcg_fpstatus);
5050                 }
5051             } else {
5052                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
5053                 if (is_signed) {
5054                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
5055                                          tcg_shift, tcg_fpstatus);
5056                 } else {
5057                     gen_helper_vfp_touls(tcg_dest, tcg_single,
5058                                          tcg_shift, tcg_fpstatus);
5059                 }
5060                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5061                 tcg_temp_free_i32(tcg_dest);
5062             }
5063             tcg_temp_free_i32(tcg_single);
5064         }
5065
5066         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
5067         tcg_temp_free_i32(tcg_rmode);
5068
5069         if (!sf) {
5070             tcg_gen_ext32u_i64(tcg_int, tcg_int);
5071         }
5072     }
5073
5074     tcg_temp_free_ptr(tcg_fpstatus);
5075     tcg_temp_free_i32(tcg_shift);
5076 }
5077
5078 /* C3.6.29 Floating point <-> fixed point conversions
5079  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
5080  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5081  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
5082  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5083  */
5084 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
5085 {
5086     int rd = extract32(insn, 0, 5);
5087     int rn = extract32(insn, 5, 5);
5088     int scale = extract32(insn, 10, 6);
5089     int opcode = extract32(insn, 16, 3);
5090     int rmode = extract32(insn, 19, 2);
5091     int type = extract32(insn, 22, 2);
5092     bool sbit = extract32(insn, 29, 1);
5093     bool sf = extract32(insn, 31, 1);
5094     bool itof;
5095
5096     if (sbit || (type > 1)
5097         || (!sf && scale < 32)) {
5098         unallocated_encoding(s);
5099         return;
5100     }
5101
5102     switch ((rmode << 3) | opcode) {
5103     case 0x2: /* SCVTF */
5104     case 0x3: /* UCVTF */
5105         itof = true;
5106         break;
5107     case 0x18: /* FCVTZS */
5108     case 0x19: /* FCVTZU */
5109         itof = false;
5110         break;
5111     default:
5112         unallocated_encoding(s);
5113         return;
5114     }
5115
5116     if (!fp_access_check(s)) {
5117         return;
5118     }
5119
5120     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5121 }
5122
5123 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5124 {
5125     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5126      * without conversion.
5127      */
5128
5129     if (itof) {
5130         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5131
5132         switch (type) {
5133         case 0:
5134         {
5135             /* 32 bit */
5136             TCGv_i64 tmp = tcg_temp_new_i64();
5137             tcg_gen_ext32u_i64(tmp, tcg_rn);
5138             tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5139             tcg_gen_movi_i64(tmp, 0);
5140             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5141             tcg_temp_free_i64(tmp);
5142             break;
5143         }
5144         case 1:
5145         {
5146             /* 64 bit */
5147             TCGv_i64 tmp = tcg_const_i64(0);
5148             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5149             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5150             tcg_temp_free_i64(tmp);
5151             break;
5152         }
5153         case 2:
5154             /* 64 bit to top half. */
5155             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5156             break;
5157         }
5158     } else {
5159         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5160
5161         switch (type) {
5162         case 0:
5163             /* 32 bit */
5164             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5165             break;
5166         case 1:
5167             /* 64 bit */
5168             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5169             break;
5170         case 2:
5171             /* 64 bits from top half */
5172             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5173             break;
5174         }
5175     }
5176 }
5177
5178 /* C3.6.30 Floating point <-> integer conversions
5179  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5180  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5181  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5182  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5183  */
5184 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5185 {
5186     int rd = extract32(insn, 0, 5);
5187     int rn = extract32(insn, 5, 5);
5188     int opcode = extract32(insn, 16, 3);
5189     int rmode = extract32(insn, 19, 2);
5190     int type = extract32(insn, 22, 2);
5191     bool sbit = extract32(insn, 29, 1);
5192     bool sf = extract32(insn, 31, 1);
5193
5194     if (sbit) {
5195         unallocated_encoding(s);
5196         return;
5197     }
5198
5199     if (opcode > 5) {
5200         /* FMOV */
5201         bool itof = opcode & 1;
5202
5203         if (rmode >= 2) {
5204             unallocated_encoding(s);
5205             return;
5206         }
5207
5208         switch (sf << 3 | type << 1 | rmode) {
5209         case 0x0: /* 32 bit */
5210         case 0xa: /* 64 bit */
5211         case 0xd: /* 64 bit to top half of quad */
5212             break;
5213         default:
5214             /* all other sf/type/rmode combinations are invalid */
5215             unallocated_encoding(s);
5216             break;
5217         }
5218
5219         if (!fp_access_check(s)) {
5220             return;
5221         }
5222         handle_fmov(s, rd, rn, type, itof);
5223     } else {
5224         /* actual FP conversions */
5225         bool itof = extract32(opcode, 1, 1);
5226
5227         if (type > 1 || (rmode != 0 && opcode > 1)) {
5228             unallocated_encoding(s);
5229             return;
5230         }
5231
5232         if (!fp_access_check(s)) {
5233             return;
5234         }
5235         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5236     }
5237 }
5238
5239 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5240  *   31  30  29 28     25 24                          0
5241  * +---+---+---+---------+-----------------------------+
5242  * |   | 0 |   | 1 1 1 1 |                             |
5243  * +---+---+---+---------+-----------------------------+
5244  */
5245 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5246 {
5247     if (extract32(insn, 24, 1)) {
5248         /* Floating point data-processing (3 source) */
5249         disas_fp_3src(s, insn);
5250     } else if (extract32(insn, 21, 1) == 0) {
5251         /* Floating point to fixed point conversions */
5252         disas_fp_fixed_conv(s, insn);
5253     } else {
5254         switch (extract32(insn, 10, 2)) {
5255         case 1:
5256             /* Floating point conditional compare */
5257             disas_fp_ccomp(s, insn);
5258             break;
5259         case 2:
5260             /* Floating point data-processing (2 source) */
5261             disas_fp_2src(s, insn);
5262             break;
5263         case 3:
5264             /* Floating point conditional select */
5265             disas_fp_csel(s, insn);
5266             break;
5267         case 0:
5268             switch (ctz32(extract32(insn, 12, 4))) {
5269             case 0: /* [15:12] == xxx1 */
5270                 /* Floating point immediate */
5271                 disas_fp_imm(s, insn);
5272                 break;
5273             case 1: /* [15:12] == xx10 */
5274                 /* Floating point compare */
5275                 disas_fp_compare(s, insn);
5276                 break;
5277             case 2: /* [15:12] == x100 */
5278                 /* Floating point data-processing (1 source) */
5279                 disas_fp_1src(s, insn);
5280                 break;
5281             case 3: /* [15:12] == 1000 */
5282                 unallocated_encoding(s);
5283                 break;
5284             default: /* [15:12] == 0000 */
5285                 /* Floating point <-> integer conversions */
5286                 disas_fp_int_conv(s, insn);
5287                 break;
5288             }
5289             break;
5290         }
5291     }
5292 }
5293
5294 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5295                      int pos)
5296 {
5297     /* Extract 64 bits from the middle of two concatenated 64 bit
5298      * vector register slices left:right. The extracted bits start
5299      * at 'pos' bits into the right (least significant) side.
5300      * We return the result in tcg_right, and guarantee not to
5301      * trash tcg_left.
5302      */
5303     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5304     assert(pos > 0 && pos < 64);
5305
5306     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5307     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5308     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5309
5310     tcg_temp_free_i64(tcg_tmp);
5311 }
5312
5313 /* C3.6.1 EXT
5314  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5315  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5316  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5317  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5318  */
5319 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5320 {
5321     int is_q = extract32(insn, 30, 1);
5322     int op2 = extract32(insn, 22, 2);
5323     int imm4 = extract32(insn, 11, 4);
5324     int rm = extract32(insn, 16, 5);
5325     int rn = extract32(insn, 5, 5);
5326     int rd = extract32(insn, 0, 5);
5327     int pos = imm4 << 3;
5328     TCGv_i64 tcg_resl, tcg_resh;
5329
5330     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5331         unallocated_encoding(s);
5332         return;
5333     }
5334
5335     if (!fp_access_check(s)) {
5336         return;
5337     }
5338
5339     tcg_resh = tcg_temp_new_i64();
5340     tcg_resl = tcg_temp_new_i64();
5341
5342     /* Vd gets bits starting at pos bits into Vm:Vn. This is
5343      * either extracting 128 bits from a 128:128 concatenation, or
5344      * extracting 64 bits from a 64:64 concatenation.
5345      */
5346     if (!is_q) {
5347         read_vec_element(s, tcg_resl, rn, 0, MO_64);
5348         if (pos != 0) {
5349             read_vec_element(s, tcg_resh, rm, 0, MO_64);
5350             do_ext64(s, tcg_resh, tcg_resl, pos);
5351         }
5352         tcg_gen_movi_i64(tcg_resh, 0);
5353     } else {
5354         TCGv_i64 tcg_hh;
5355         typedef struct {
5356             int reg;
5357             int elt;
5358         } EltPosns;
5359         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5360         EltPosns *elt = eltposns;
5361
5362         if (pos >= 64) {
5363             elt++;
5364             pos -= 64;
5365         }
5366
5367         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5368         elt++;
5369         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5370         elt++;
5371         if (pos != 0) {
5372             do_ext64(s, tcg_resh, tcg_resl, pos);
5373             tcg_hh = tcg_temp_new_i64();
5374             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5375             do_ext64(s, tcg_hh, tcg_resh, pos);
5376             tcg_temp_free_i64(tcg_hh);
5377         }
5378     }
5379
5380     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5381     tcg_temp_free_i64(tcg_resl);
5382     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5383     tcg_temp_free_i64(tcg_resh);
5384 }
5385
5386 /* C3.6.2 TBL/TBX
5387  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5388  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5389  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5390  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5391  */
5392 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5393 {
5394     int op2 = extract32(insn, 22, 2);
5395     int is_q = extract32(insn, 30, 1);
5396     int rm = extract32(insn, 16, 5);
5397     int rn = extract32(insn, 5, 5);
5398     int rd = extract32(insn, 0, 5);
5399     int is_tblx = extract32(insn, 12, 1);
5400     int len = extract32(insn, 13, 2);
5401     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5402     TCGv_i32 tcg_regno, tcg_numregs;
5403
5404     if (op2 != 0) {
5405         unallocated_encoding(s);
5406         return;
5407     }
5408
5409     if (!fp_access_check(s)) {
5410         return;
5411     }
5412
5413     /* This does a table lookup: for every byte element in the input
5414      * we index into a table formed from up to four vector registers,
5415      * and then the output is the result of the lookups. Our helper
5416      * function does the lookup operation for a single 64 bit part of
5417      * the input.
5418      */
5419     tcg_resl = tcg_temp_new_i64();
5420     tcg_resh = tcg_temp_new_i64();
5421
5422     if (is_tblx) {
5423         read_vec_element(s, tcg_resl, rd, 0, MO_64);
5424     } else {
5425         tcg_gen_movi_i64(tcg_resl, 0);
5426     }
5427     if (is_tblx && is_q) {
5428         read_vec_element(s, tcg_resh, rd, 1, MO_64);
5429     } else {
5430         tcg_gen_movi_i64(tcg_resh, 0);
5431     }
5432
5433     tcg_idx = tcg_temp_new_i64();
5434     tcg_regno = tcg_const_i32(rn);
5435     tcg_numregs = tcg_const_i32(len + 1);
5436     read_vec_element(s, tcg_idx, rm, 0, MO_64);
5437     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5438                         tcg_regno, tcg_numregs);
5439     if (is_q) {
5440         read_vec_element(s, tcg_idx, rm, 1, MO_64);
5441         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5442                             tcg_regno, tcg_numregs);
5443     }
5444     tcg_temp_free_i64(tcg_idx);
5445     tcg_temp_free_i32(tcg_regno);
5446     tcg_temp_free_i32(tcg_numregs);
5447
5448     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5449     tcg_temp_free_i64(tcg_resl);
5450     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5451     tcg_temp_free_i64(tcg_resh);
5452 }
5453
5454 /* C3.6.3 ZIP/UZP/TRN
5455  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5456  * +---+---+-------------+------+---+------+---+------------------+------+
5457  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5458  * +---+---+-------------+------+---+------+---+------------------+------+
5459  */
5460 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5461 {
5462     int rd = extract32(insn, 0, 5);
5463     int rn = extract32(insn, 5, 5);
5464     int rm = extract32(insn, 16, 5);
5465     int size = extract32(insn, 22, 2);
5466     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5467      * bit 2 indicates 1 vs 2 variant of the insn.
5468      */
5469     int opcode = extract32(insn, 12, 2);
5470     bool part = extract32(insn, 14, 1);
5471     bool is_q = extract32(insn, 30, 1);
5472     int esize = 8 << size;
5473     int i, ofs;
5474     int datasize = is_q ? 128 : 64;
5475     int elements = datasize / esize;
5476     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5477
5478     if (opcode == 0 || (size == 3 && !is_q)) {
5479         unallocated_encoding(s);
5480         return;
5481     }
5482
5483     if (!fp_access_check(s)) {
5484         return;
5485     }
5486
5487     tcg_resl = tcg_const_i64(0);
5488     tcg_resh = tcg_const_i64(0);
5489     tcg_res = tcg_temp_new_i64();
5490
5491     for (i = 0; i < elements; i++) {
5492         switch (opcode) {
5493         case 1: /* UZP1/2 */
5494         {
5495             int midpoint = elements / 2;
5496             if (i < midpoint) {
5497                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5498             } else {
5499                 read_vec_element(s, tcg_res, rm,
5500                                  2 * (i - midpoint) + part, size);
5501             }
5502             break;
5503         }
5504         case 2: /* TRN1/2 */
5505             if (i & 1) {
5506                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5507             } else {
5508                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5509             }
5510             break;
5511         case 3: /* ZIP1/2 */
5512         {
5513             int base = part * elements / 2;
5514             if (i & 1) {
5515                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5516             } else {
5517                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5518             }
5519             break;
5520         }
5521         default:
5522             g_assert_not_reached();
5523         }
5524
5525         ofs = i * esize;
5526         if (ofs < 64) {
5527             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5528             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5529         } else {
5530             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5531             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5532         }
5533     }
5534
5535     tcg_temp_free_i64(tcg_res);
5536
5537     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5538     tcg_temp_free_i64(tcg_resl);
5539     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5540     tcg_temp_free_i64(tcg_resh);
5541 }
5542
5543 static void do_minmaxop(DisasContext *s, TCGv_i32 tcg_elt1, TCGv_i32 tcg_elt2,
5544                         int opc, bool is_min, TCGv_ptr fpst)
5545 {
5546     /* Helper function for disas_simd_across_lanes: do a single precision
5547      * min/max operation on the specified two inputs,
5548      * and return the result in tcg_elt1.
5549      */
5550     if (opc == 0xc) {
5551         if (is_min) {
5552             gen_helper_vfp_minnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5553         } else {
5554             gen_helper_vfp_maxnums(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5555         }
5556     } else {
5557         assert(opc == 0xf);
5558         if (is_min) {
5559             gen_helper_vfp_mins(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5560         } else {
5561             gen_helper_vfp_maxs(tcg_elt1, tcg_elt1, tcg_elt2, fpst);
5562         }
5563     }
5564 }
5565
5566 /* C3.6.4 AdvSIMD across lanes
5567  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5568  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5569  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5570  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5571  */
5572 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5573 {
5574     int rd = extract32(insn, 0, 5);
5575     int rn = extract32(insn, 5, 5);
5576     int size = extract32(insn, 22, 2);
5577     int opcode = extract32(insn, 12, 5);
5578     bool is_q = extract32(insn, 30, 1);
5579     bool is_u = extract32(insn, 29, 1);
5580     bool is_fp = false;
5581     bool is_min = false;
5582     int esize;
5583     int elements;
5584     int i;
5585     TCGv_i64 tcg_res, tcg_elt;
5586
5587     switch (opcode) {
5588     case 0x1b: /* ADDV */
5589         if (is_u) {
5590             unallocated_encoding(s);
5591             return;
5592         }
5593         /* fall through */
5594     case 0x3: /* SADDLV, UADDLV */
5595     case 0xa: /* SMAXV, UMAXV */
5596     case 0x1a: /* SMINV, UMINV */
5597         if (size == 3 || (size == 2 && !is_q)) {
5598             unallocated_encoding(s);
5599             return;
5600         }
5601         break;
5602     case 0xc: /* FMAXNMV, FMINNMV */
5603     case 0xf: /* FMAXV, FMINV */
5604         if (!is_u || !is_q || extract32(size, 0, 1)) {
5605             unallocated_encoding(s);
5606             return;
5607         }
5608         /* Bit 1 of size field encodes min vs max, and actual size is always
5609          * 32 bits: adjust the size variable so following code can rely on it
5610          */
5611         is_min = extract32(size, 1, 1);
5612         is_fp = true;
5613         size = 2;
5614         break;
5615     default:
5616         unallocated_encoding(s);
5617         return;
5618     }
5619
5620     if (!fp_access_check(s)) {
5621         return;
5622     }
5623
5624     esize = 8 << size;
5625     elements = (is_q ? 128 : 64) / esize;
5626
5627     tcg_res = tcg_temp_new_i64();
5628     tcg_elt = tcg_temp_new_i64();
5629
5630     /* These instructions operate across all lanes of a vector
5631      * to produce a single result. We can guarantee that a 64
5632      * bit intermediate is sufficient:
5633      *  + for [US]ADDLV the maximum element size is 32 bits, and
5634      *    the result type is 64 bits
5635      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5636      *    same as the element size, which is 32 bits at most
5637      * For the integer operations we can choose to work at 64
5638      * or 32 bits and truncate at the end; for simplicity
5639      * we use 64 bits always. The floating point
5640      * ops do require 32 bit intermediates, though.
5641      */
5642     if (!is_fp) {
5643         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5644
5645         for (i = 1; i < elements; i++) {
5646             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
5647
5648             switch (opcode) {
5649             case 0x03: /* SADDLV / UADDLV */
5650             case 0x1b: /* ADDV */
5651                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
5652                 break;
5653             case 0x0a: /* SMAXV / UMAXV */
5654                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
5655                                     tcg_res,
5656                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5657                 break;
5658             case 0x1a: /* SMINV / UMINV */
5659                 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
5660                                     tcg_res,
5661                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
5662                 break;
5663                 break;
5664             default:
5665                 g_assert_not_reached();
5666             }
5667
5668         }
5669     } else {
5670         /* Floating point ops which work on 32 bit (single) intermediates.
5671          * Note that correct NaN propagation requires that we do these
5672          * operations in exactly the order specified by the pseudocode.
5673          */
5674         TCGv_i32 tcg_elt1 = tcg_temp_new_i32();
5675         TCGv_i32 tcg_elt2 = tcg_temp_new_i32();
5676         TCGv_i32 tcg_elt3 = tcg_temp_new_i32();
5677         TCGv_ptr fpst = get_fpstatus_ptr();
5678
5679         assert(esize == 32);
5680         assert(elements == 4);
5681
5682         read_vec_element(s, tcg_elt, rn, 0, MO_32);
5683         tcg_gen_extrl_i64_i32(tcg_elt1, tcg_elt);
5684         read_vec_element(s, tcg_elt, rn, 1, MO_32);
5685         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5686
5687         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5688
5689         read_vec_element(s, tcg_elt, rn, 2, MO_32);
5690         tcg_gen_extrl_i64_i32(tcg_elt2, tcg_elt);
5691         read_vec_element(s, tcg_elt, rn, 3, MO_32);
5692         tcg_gen_extrl_i64_i32(tcg_elt3, tcg_elt);
5693
5694         do_minmaxop(s, tcg_elt2, tcg_elt3, opcode, is_min, fpst);
5695
5696         do_minmaxop(s, tcg_elt1, tcg_elt2, opcode, is_min, fpst);
5697
5698         tcg_gen_extu_i32_i64(tcg_res, tcg_elt1);
5699         tcg_temp_free_i32(tcg_elt1);
5700         tcg_temp_free_i32(tcg_elt2);
5701         tcg_temp_free_i32(tcg_elt3);
5702         tcg_temp_free_ptr(fpst);
5703     }
5704
5705     tcg_temp_free_i64(tcg_elt);
5706
5707     /* Now truncate the result to the width required for the final output */
5708     if (opcode == 0x03) {
5709         /* SADDLV, UADDLV: result is 2*esize */
5710         size++;
5711     }
5712
5713     switch (size) {
5714     case 0:
5715         tcg_gen_ext8u_i64(tcg_res, tcg_res);
5716         break;
5717     case 1:
5718         tcg_gen_ext16u_i64(tcg_res, tcg_res);
5719         break;
5720     case 2:
5721         tcg_gen_ext32u_i64(tcg_res, tcg_res);
5722         break;
5723     case 3:
5724         break;
5725     default:
5726         g_assert_not_reached();
5727     }
5728
5729     write_fp_dreg(s, rd, tcg_res);
5730     tcg_temp_free_i64(tcg_res);
5731 }
5732
5733 /* C6.3.31 DUP (Element, Vector)
5734  *
5735  *  31  30   29              21 20    16 15        10  9    5 4    0
5736  * +---+---+-------------------+--------+-------------+------+------+
5737  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5738  * +---+---+-------------------+--------+-------------+------+------+
5739  *
5740  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5741  */
5742 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
5743                              int imm5)
5744 {
5745     int size = ctz32(imm5);
5746     int esize = 8 << size;
5747     int elements = (is_q ? 128 : 64) / esize;
5748     int index, i;
5749     TCGv_i64 tmp;
5750
5751     if (size > 3 || (size == 3 && !is_q)) {
5752         unallocated_encoding(s);
5753         return;
5754     }
5755
5756     if (!fp_access_check(s)) {
5757         return;
5758     }
5759
5760     index = imm5 >> (size + 1);
5761
5762     tmp = tcg_temp_new_i64();
5763     read_vec_element(s, tmp, rn, index, size);
5764
5765     for (i = 0; i < elements; i++) {
5766         write_vec_element(s, tmp, rd, i, size);
5767     }
5768
5769     if (!is_q) {
5770         clear_vec_high(s, rd);
5771     }
5772
5773     tcg_temp_free_i64(tmp);
5774 }
5775
5776 /* C6.3.31 DUP (element, scalar)
5777  *  31                   21 20    16 15        10  9    5 4    0
5778  * +-----------------------+--------+-------------+------+------+
5779  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
5780  * +-----------------------+--------+-------------+------+------+
5781  */
5782 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
5783                               int imm5)
5784 {
5785     int size = ctz32(imm5);
5786     int index;
5787     TCGv_i64 tmp;
5788
5789     if (size > 3) {
5790         unallocated_encoding(s);
5791         return;
5792     }
5793
5794     if (!fp_access_check(s)) {
5795         return;
5796     }
5797
5798     index = imm5 >> (size + 1);
5799
5800     /* This instruction just extracts the specified element and
5801      * zero-extends it into the bottom of the destination register.
5802      */
5803     tmp = tcg_temp_new_i64();
5804     read_vec_element(s, tmp, rn, index, size);
5805     write_fp_dreg(s, rd, tmp);
5806     tcg_temp_free_i64(tmp);
5807 }
5808
5809 /* C6.3.32 DUP (General)
5810  *
5811  *  31  30   29              21 20    16 15        10  9    5 4    0
5812  * +---+---+-------------------+--------+-------------+------+------+
5813  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
5814  * +---+---+-------------------+--------+-------------+------+------+
5815  *
5816  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5817  */
5818 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
5819                              int imm5)
5820 {
5821     int size = ctz32(imm5);
5822     int esize = 8 << size;
5823     int elements = (is_q ? 128 : 64)/esize;
5824     int i = 0;
5825
5826     if (size > 3 || ((size == 3) && !is_q)) {
5827         unallocated_encoding(s);
5828         return;
5829     }
5830
5831     if (!fp_access_check(s)) {
5832         return;
5833     }
5834
5835     for (i = 0; i < elements; i++) {
5836         write_vec_element(s, cpu_reg(s, rn), rd, i, size);
5837     }
5838     if (!is_q) {
5839         clear_vec_high(s, rd);
5840     }
5841 }
5842
5843 /* C6.3.150 INS (Element)
5844  *
5845  *  31                   21 20    16 15  14    11  10 9    5 4    0
5846  * +-----------------------+--------+------------+---+------+------+
5847  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
5848  * +-----------------------+--------+------------+---+------+------+
5849  *
5850  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5851  * index: encoded in imm5<4:size+1>
5852  */
5853 static void handle_simd_inse(DisasContext *s, int rd, int rn,
5854                              int imm4, int imm5)
5855 {
5856     int size = ctz32(imm5);
5857     int src_index, dst_index;
5858     TCGv_i64 tmp;
5859
5860     if (size > 3) {
5861         unallocated_encoding(s);
5862         return;
5863     }
5864
5865     if (!fp_access_check(s)) {
5866         return;
5867     }
5868
5869     dst_index = extract32(imm5, 1+size, 5);
5870     src_index = extract32(imm4, size, 4);
5871
5872     tmp = tcg_temp_new_i64();
5873
5874     read_vec_element(s, tmp, rn, src_index, size);
5875     write_vec_element(s, tmp, rd, dst_index, size);
5876
5877     tcg_temp_free_i64(tmp);
5878 }
5879
5880
5881 /* C6.3.151 INS (General)
5882  *
5883  *  31                   21 20    16 15        10  9    5 4    0
5884  * +-----------------------+--------+-------------+------+------+
5885  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
5886  * +-----------------------+--------+-------------+------+------+
5887  *
5888  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5889  * index: encoded in imm5<4:size+1>
5890  */
5891 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
5892 {
5893     int size = ctz32(imm5);
5894     int idx;
5895
5896     if (size > 3) {
5897         unallocated_encoding(s);
5898         return;
5899     }
5900
5901     if (!fp_access_check(s)) {
5902         return;
5903     }
5904
5905     idx = extract32(imm5, 1 + size, 4 - size);
5906     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
5907 }
5908
5909 /*
5910  * C6.3.321 UMOV (General)
5911  * C6.3.237 SMOV (General)
5912  *
5913  *  31  30   29              21 20    16 15    12   10 9    5 4    0
5914  * +---+---+-------------------+--------+-------------+------+------+
5915  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
5916  * +---+---+-------------------+--------+-------------+------+------+
5917  *
5918  * U: unsigned when set
5919  * size: encoded in imm5 (see ARM ARM LowestSetBit())
5920  */
5921 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
5922                                   int rn, int rd, int imm5)
5923 {
5924     int size = ctz32(imm5);
5925     int element;
5926     TCGv_i64 tcg_rd;
5927
5928     /* Check for UnallocatedEncodings */
5929     if (is_signed) {
5930         if (size > 2 || (size == 2 && !is_q)) {
5931             unallocated_encoding(s);
5932             return;
5933         }
5934     } else {
5935         if (size > 3
5936             || (size < 3 && is_q)
5937             || (size == 3 && !is_q)) {
5938             unallocated_encoding(s);
5939             return;
5940         }
5941     }
5942
5943     if (!fp_access_check(s)) {
5944         return;
5945     }
5946
5947     element = extract32(imm5, 1+size, 4);
5948
5949     tcg_rd = cpu_reg(s, rd);
5950     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
5951     if (is_signed && !is_q) {
5952         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
5953     }
5954 }
5955
5956 /* C3.6.5 AdvSIMD copy
5957  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
5958  * +---+---+----+-----------------+------+---+------+---+------+------+
5959  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
5960  * +---+---+----+-----------------+------+---+------+---+------+------+
5961  */
5962 static void disas_simd_copy(DisasContext *s, uint32_t insn)
5963 {
5964     int rd = extract32(insn, 0, 5);
5965     int rn = extract32(insn, 5, 5);
5966     int imm4 = extract32(insn, 11, 4);
5967     int op = extract32(insn, 29, 1);
5968     int is_q = extract32(insn, 30, 1);
5969     int imm5 = extract32(insn, 16, 5);
5970
5971     if (op) {
5972         if (is_q) {
5973             /* INS (element) */
5974             handle_simd_inse(s, rd, rn, imm4, imm5);
5975         } else {
5976             unallocated_encoding(s);
5977         }
5978     } else {
5979         switch (imm4) {
5980         case 0:
5981             /* DUP (element - vector) */
5982             handle_simd_dupe(s, is_q, rd, rn, imm5);
5983             break;
5984         case 1:
5985             /* DUP (general) */
5986             handle_simd_dupg(s, is_q, rd, rn, imm5);
5987             break;
5988         case 3:
5989             if (is_q) {
5990                 /* INS (general) */
5991                 handle_simd_insg(s, rd, rn, imm5);
5992             } else {
5993                 unallocated_encoding(s);
5994             }
5995             break;
5996         case 5:
5997         case 7:
5998             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
5999             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
6000             break;
6001         default:
6002             unallocated_encoding(s);
6003             break;
6004         }
6005     }
6006 }
6007
6008 /* C3.6.6 AdvSIMD modified immediate
6009  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
6010  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6011  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
6012  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6013  *
6014  * There are a number of operations that can be carried out here:
6015  *   MOVI - move (shifted) imm into register
6016  *   MVNI - move inverted (shifted) imm into register
6017  *   ORR  - bitwise OR of (shifted) imm with register
6018  *   BIC  - bitwise clear of (shifted) imm with register
6019  */
6020 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
6021 {
6022     int rd = extract32(insn, 0, 5);
6023     int cmode = extract32(insn, 12, 4);
6024     int cmode_3_1 = extract32(cmode, 1, 3);
6025     int cmode_0 = extract32(cmode, 0, 1);
6026     int o2 = extract32(insn, 11, 1);
6027     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
6028     bool is_neg = extract32(insn, 29, 1);
6029     bool is_q = extract32(insn, 30, 1);
6030     uint64_t imm = 0;
6031     TCGv_i64 tcg_rd, tcg_imm;
6032     int i;
6033
6034     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
6035         unallocated_encoding(s);
6036         return;
6037     }
6038
6039     if (!fp_access_check(s)) {
6040         return;
6041     }
6042
6043     /* See AdvSIMDExpandImm() in ARM ARM */
6044     switch (cmode_3_1) {
6045     case 0: /* Replicate(Zeros(24):imm8, 2) */
6046     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
6047     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
6048     case 3: /* Replicate(imm8:Zeros(24), 2) */
6049     {
6050         int shift = cmode_3_1 * 8;
6051         imm = bitfield_replicate(abcdefgh << shift, 32);
6052         break;
6053     }
6054     case 4: /* Replicate(Zeros(8):imm8, 4) */
6055     case 5: /* Replicate(imm8:Zeros(8), 4) */
6056     {
6057         int shift = (cmode_3_1 & 0x1) * 8;
6058         imm = bitfield_replicate(abcdefgh << shift, 16);
6059         break;
6060     }
6061     case 6:
6062         if (cmode_0) {
6063             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
6064             imm = (abcdefgh << 16) | 0xffff;
6065         } else {
6066             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
6067             imm = (abcdefgh << 8) | 0xff;
6068         }
6069         imm = bitfield_replicate(imm, 32);
6070         break;
6071     case 7:
6072         if (!cmode_0 && !is_neg) {
6073             imm = bitfield_replicate(abcdefgh, 8);
6074         } else if (!cmode_0 && is_neg) {
6075             int i;
6076             imm = 0;
6077             for (i = 0; i < 8; i++) {
6078                 if ((abcdefgh) & (1 << i)) {
6079                     imm |= 0xffULL << (i * 8);
6080                 }
6081             }
6082         } else if (cmode_0) {
6083             if (is_neg) {
6084                 imm = (abcdefgh & 0x3f) << 48;
6085                 if (abcdefgh & 0x80) {
6086                     imm |= 0x8000000000000000ULL;
6087                 }
6088                 if (abcdefgh & 0x40) {
6089                     imm |= 0x3fc0000000000000ULL;
6090                 } else {
6091                     imm |= 0x4000000000000000ULL;
6092                 }
6093             } else {
6094                 imm = (abcdefgh & 0x3f) << 19;
6095                 if (abcdefgh & 0x80) {
6096                     imm |= 0x80000000;
6097                 }
6098                 if (abcdefgh & 0x40) {
6099                     imm |= 0x3e000000;
6100                 } else {
6101                     imm |= 0x40000000;
6102                 }
6103                 imm |= (imm << 32);
6104             }
6105         }
6106         break;
6107     }
6108
6109     if (cmode_3_1 != 7 && is_neg) {
6110         imm = ~imm;
6111     }
6112
6113     tcg_imm = tcg_const_i64(imm);
6114     tcg_rd = new_tmp_a64(s);
6115
6116     for (i = 0; i < 2; i++) {
6117         int foffs = i ? fp_reg_hi_offset(s, rd) : fp_reg_offset(s, rd, MO_64);
6118
6119         if (i == 1 && !is_q) {
6120             /* non-quad ops clear high half of vector */
6121             tcg_gen_movi_i64(tcg_rd, 0);
6122         } else if ((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9) {
6123             tcg_gen_ld_i64(tcg_rd, cpu_env, foffs);
6124             if (is_neg) {
6125                 /* AND (BIC) */
6126                 tcg_gen_and_i64(tcg_rd, tcg_rd, tcg_imm);
6127             } else {
6128                 /* ORR */
6129                 tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_imm);
6130             }
6131         } else {
6132             /* MOVI */
6133             tcg_gen_mov_i64(tcg_rd, tcg_imm);
6134         }
6135         tcg_gen_st_i64(tcg_rd, cpu_env, foffs);
6136     }
6137
6138     tcg_temp_free_i64(tcg_imm);
6139 }
6140
6141 /* C3.6.7 AdvSIMD scalar copy
6142  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6143  * +-----+----+-----------------+------+---+------+---+------+------+
6144  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6145  * +-----+----+-----------------+------+---+------+---+------+------+
6146  */
6147 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6148 {
6149     int rd = extract32(insn, 0, 5);
6150     int rn = extract32(insn, 5, 5);
6151     int imm4 = extract32(insn, 11, 4);
6152     int imm5 = extract32(insn, 16, 5);
6153     int op = extract32(insn, 29, 1);
6154
6155     if (op != 0 || imm4 != 0) {
6156         unallocated_encoding(s);
6157         return;
6158     }
6159
6160     /* DUP (element, scalar) */
6161     handle_simd_dupes(s, rd, rn, imm5);
6162 }
6163
6164 /* C3.6.8 AdvSIMD scalar pairwise
6165  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6166  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6167  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6168  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6169  */
6170 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6171 {
6172     int u = extract32(insn, 29, 1);
6173     int size = extract32(insn, 22, 2);
6174     int opcode = extract32(insn, 12, 5);
6175     int rn = extract32(insn, 5, 5);
6176     int rd = extract32(insn, 0, 5);
6177     TCGv_ptr fpst;
6178
6179     /* For some ops (the FP ones), size[1] is part of the encoding.
6180      * For ADDP strictly it is not but size[1] is always 1 for valid
6181      * encodings.
6182      */
6183     opcode |= (extract32(size, 1, 1) << 5);
6184
6185     switch (opcode) {
6186     case 0x3b: /* ADDP */
6187         if (u || size != 3) {
6188             unallocated_encoding(s);
6189             return;
6190         }
6191         if (!fp_access_check(s)) {
6192             return;
6193         }
6194
6195         TCGV_UNUSED_PTR(fpst);
6196         break;
6197     case 0xc: /* FMAXNMP */
6198     case 0xd: /* FADDP */
6199     case 0xf: /* FMAXP */
6200     case 0x2c: /* FMINNMP */
6201     case 0x2f: /* FMINP */
6202         /* FP op, size[0] is 32 or 64 bit */
6203         if (!u) {
6204             unallocated_encoding(s);
6205             return;
6206         }
6207         if (!fp_access_check(s)) {
6208             return;
6209         }
6210
6211         size = extract32(size, 0, 1) ? 3 : 2;
6212         fpst = get_fpstatus_ptr();
6213         break;
6214     default:
6215         unallocated_encoding(s);
6216         return;
6217     }
6218
6219     if (size == 3) {
6220         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6221         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6222         TCGv_i64 tcg_res = tcg_temp_new_i64();
6223
6224         read_vec_element(s, tcg_op1, rn, 0, MO_64);
6225         read_vec_element(s, tcg_op2, rn, 1, MO_64);
6226
6227         switch (opcode) {
6228         case 0x3b: /* ADDP */
6229             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6230             break;
6231         case 0xc: /* FMAXNMP */
6232             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6233             break;
6234         case 0xd: /* FADDP */
6235             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6236             break;
6237         case 0xf: /* FMAXP */
6238             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6239             break;
6240         case 0x2c: /* FMINNMP */
6241             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6242             break;
6243         case 0x2f: /* FMINP */
6244             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6245             break;
6246         default:
6247             g_assert_not_reached();
6248         }
6249
6250         write_fp_dreg(s, rd, tcg_res);
6251
6252         tcg_temp_free_i64(tcg_op1);
6253         tcg_temp_free_i64(tcg_op2);
6254         tcg_temp_free_i64(tcg_res);
6255     } else {
6256         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6257         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6258         TCGv_i32 tcg_res = tcg_temp_new_i32();
6259
6260         read_vec_element_i32(s, tcg_op1, rn, 0, MO_32);
6261         read_vec_element_i32(s, tcg_op2, rn, 1, MO_32);
6262
6263         switch (opcode) {
6264         case 0xc: /* FMAXNMP */
6265             gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6266             break;
6267         case 0xd: /* FADDP */
6268             gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6269             break;
6270         case 0xf: /* FMAXP */
6271             gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6272             break;
6273         case 0x2c: /* FMINNMP */
6274             gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6275             break;
6276         case 0x2f: /* FMINP */
6277             gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6278             break;
6279         default:
6280             g_assert_not_reached();
6281         }
6282
6283         write_fp_sreg(s, rd, tcg_res);
6284
6285         tcg_temp_free_i32(tcg_op1);
6286         tcg_temp_free_i32(tcg_op2);
6287         tcg_temp_free_i32(tcg_res);
6288     }
6289
6290     if (!TCGV_IS_UNUSED_PTR(fpst)) {
6291         tcg_temp_free_ptr(fpst);
6292     }
6293 }
6294
6295 /*
6296  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6297  *
6298  * This code is handles the common shifting code and is used by both
6299  * the vector and scalar code.
6300  */
6301 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6302                                     TCGv_i64 tcg_rnd, bool accumulate,
6303                                     bool is_u, int size, int shift)
6304 {
6305     bool extended_result = false;
6306     bool round = !TCGV_IS_UNUSED_I64(tcg_rnd);
6307     int ext_lshift = 0;
6308     TCGv_i64 tcg_src_hi;
6309
6310     if (round && size == 3) {
6311         extended_result = true;
6312         ext_lshift = 64 - shift;
6313         tcg_src_hi = tcg_temp_new_i64();
6314     } else if (shift == 64) {
6315         if (!accumulate && is_u) {
6316             /* result is zero */
6317             tcg_gen_movi_i64(tcg_res, 0);
6318             return;
6319         }
6320     }
6321
6322     /* Deal with the rounding step */
6323     if (round) {
6324         if (extended_result) {
6325             TCGv_i64 tcg_zero = tcg_const_i64(0);
6326             if (!is_u) {
6327                 /* take care of sign extending tcg_res */
6328                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6329                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6330                                  tcg_src, tcg_src_hi,
6331                                  tcg_rnd, tcg_zero);
6332             } else {
6333                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6334                                  tcg_src, tcg_zero,
6335                                  tcg_rnd, tcg_zero);
6336             }
6337             tcg_temp_free_i64(tcg_zero);
6338         } else {
6339             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6340         }
6341     }
6342
6343     /* Now do the shift right */
6344     if (round && extended_result) {
6345         /* extended case, >64 bit precision required */
6346         if (ext_lshift == 0) {
6347             /* special case, only high bits matter */
6348             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6349         } else {
6350             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6351             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6352             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6353         }
6354     } else {
6355         if (is_u) {
6356             if (shift == 64) {
6357                 /* essentially shifting in 64 zeros */
6358                 tcg_gen_movi_i64(tcg_src, 0);
6359             } else {
6360                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6361             }
6362         } else {
6363             if (shift == 64) {
6364                 /* effectively extending the sign-bit */
6365                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6366             } else {
6367                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6368             }
6369         }
6370     }
6371
6372     if (accumulate) {
6373         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6374     } else {
6375         tcg_gen_mov_i64(tcg_res, tcg_src);
6376     }
6377
6378     if (extended_result) {
6379         tcg_temp_free_i64(tcg_src_hi);
6380     }
6381 }
6382
6383 /* Common SHL/SLI - Shift left with an optional insert */
6384 static void handle_shli_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6385                                  bool insert, int shift)
6386 {
6387     if (insert) { /* SLI */
6388         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, shift, 64 - shift);
6389     } else { /* SHL */
6390         tcg_gen_shli_i64(tcg_res, tcg_src, shift);
6391     }
6392 }
6393
6394 /* SRI: shift right with insert */
6395 static void handle_shri_with_ins(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6396                                  int size, int shift)
6397 {
6398     int esize = 8 << size;
6399
6400     /* shift count same as element size is valid but does nothing;
6401      * special case to avoid potential shift by 64.
6402      */
6403     if (shift != esize) {
6404         tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6405         tcg_gen_deposit_i64(tcg_res, tcg_res, tcg_src, 0, esize - shift);
6406     }
6407 }
6408
6409 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6410 static void handle_scalar_simd_shri(DisasContext *s,
6411                                     bool is_u, int immh, int immb,
6412                                     int opcode, int rn, int rd)
6413 {
6414     const int size = 3;
6415     int immhb = immh << 3 | immb;
6416     int shift = 2 * (8 << size) - immhb;
6417     bool accumulate = false;
6418     bool round = false;
6419     bool insert = false;
6420     TCGv_i64 tcg_rn;
6421     TCGv_i64 tcg_rd;
6422     TCGv_i64 tcg_round;
6423
6424     if (!extract32(immh, 3, 1)) {
6425         unallocated_encoding(s);
6426         return;
6427     }
6428
6429     if (!fp_access_check(s)) {
6430         return;
6431     }
6432
6433     switch (opcode) {
6434     case 0x02: /* SSRA / USRA (accumulate) */
6435         accumulate = true;
6436         break;
6437     case 0x04: /* SRSHR / URSHR (rounding) */
6438         round = true;
6439         break;
6440     case 0x06: /* SRSRA / URSRA (accum + rounding) */
6441         accumulate = round = true;
6442         break;
6443     case 0x08: /* SRI */
6444         insert = true;
6445         break;
6446     }
6447
6448     if (round) {
6449         uint64_t round_const = 1ULL << (shift - 1);
6450         tcg_round = tcg_const_i64(round_const);
6451     } else {
6452         TCGV_UNUSED_I64(tcg_round);
6453     }
6454
6455     tcg_rn = read_fp_dreg(s, rn);
6456     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6457
6458     if (insert) {
6459         handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
6460     } else {
6461         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6462                                 accumulate, is_u, size, shift);
6463     }
6464
6465     write_fp_dreg(s, rd, tcg_rd);
6466
6467     tcg_temp_free_i64(tcg_rn);
6468     tcg_temp_free_i64(tcg_rd);
6469     if (round) {
6470         tcg_temp_free_i64(tcg_round);
6471     }
6472 }
6473
6474 /* SHL/SLI - Scalar shift left */
6475 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6476                                     int immh, int immb, int opcode,
6477                                     int rn, int rd)
6478 {
6479     int size = 32 - clz32(immh) - 1;
6480     int immhb = immh << 3 | immb;
6481     int shift = immhb - (8 << size);
6482     TCGv_i64 tcg_rn = new_tmp_a64(s);
6483     TCGv_i64 tcg_rd = new_tmp_a64(s);
6484
6485     if (!extract32(immh, 3, 1)) {
6486         unallocated_encoding(s);
6487         return;
6488     }
6489
6490     if (!fp_access_check(s)) {
6491         return;
6492     }
6493
6494     tcg_rn = read_fp_dreg(s, rn);
6495     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6496
6497     handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
6498
6499     write_fp_dreg(s, rd, tcg_rd);
6500
6501     tcg_temp_free_i64(tcg_rn);
6502     tcg_temp_free_i64(tcg_rd);
6503 }
6504
6505 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6506  * (signed/unsigned) narrowing */
6507 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6508                                    bool is_u_shift, bool is_u_narrow,
6509                                    int immh, int immb, int opcode,
6510                                    int rn, int rd)
6511 {
6512     int immhb = immh << 3 | immb;
6513     int size = 32 - clz32(immh) - 1;
6514     int esize = 8 << size;
6515     int shift = (2 * esize) - immhb;
6516     int elements = is_scalar ? 1 : (64 / esize);
6517     bool round = extract32(opcode, 0, 1);
6518     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6519     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6520     TCGv_i32 tcg_rd_narrowed;
6521     TCGv_i64 tcg_final;
6522
6523     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6524         { gen_helper_neon_narrow_sat_s8,
6525           gen_helper_neon_unarrow_sat8 },
6526         { gen_helper_neon_narrow_sat_s16,
6527           gen_helper_neon_unarrow_sat16 },
6528         { gen_helper_neon_narrow_sat_s32,
6529           gen_helper_neon_unarrow_sat32 },
6530         { NULL, NULL },
6531     };
6532     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6533         gen_helper_neon_narrow_sat_u8,
6534         gen_helper_neon_narrow_sat_u16,
6535         gen_helper_neon_narrow_sat_u32,
6536         NULL
6537     };
6538     NeonGenNarrowEnvFn *narrowfn;
6539
6540     int i;
6541
6542     assert(size < 4);
6543
6544     if (extract32(immh, 3, 1)) {
6545         unallocated_encoding(s);
6546         return;
6547     }
6548
6549     if (!fp_access_check(s)) {
6550         return;
6551     }
6552
6553     if (is_u_shift) {
6554         narrowfn = unsigned_narrow_fns[size];
6555     } else {
6556         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6557     }
6558
6559     tcg_rn = tcg_temp_new_i64();
6560     tcg_rd = tcg_temp_new_i64();
6561     tcg_rd_narrowed = tcg_temp_new_i32();
6562     tcg_final = tcg_const_i64(0);
6563
6564     if (round) {
6565         uint64_t round_const = 1ULL << (shift - 1);
6566         tcg_round = tcg_const_i64(round_const);
6567     } else {
6568         TCGV_UNUSED_I64(tcg_round);
6569     }
6570
6571     for (i = 0; i < elements; i++) {
6572         read_vec_element(s, tcg_rn, rn, i, ldop);
6573         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6574                                 false, is_u_shift, size+1, shift);
6575         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6576         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6577         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6578     }
6579
6580     if (!is_q) {
6581         clear_vec_high(s, rd);
6582         write_vec_element(s, tcg_final, rd, 0, MO_64);
6583     } else {
6584         write_vec_element(s, tcg_final, rd, 1, MO_64);
6585     }
6586
6587     if (round) {
6588         tcg_temp_free_i64(tcg_round);
6589     }
6590     tcg_temp_free_i64(tcg_rn);
6591     tcg_temp_free_i64(tcg_rd);
6592     tcg_temp_free_i32(tcg_rd_narrowed);
6593     tcg_temp_free_i64(tcg_final);
6594     return;
6595 }
6596
6597 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6598 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6599                              bool src_unsigned, bool dst_unsigned,
6600                              int immh, int immb, int rn, int rd)
6601 {
6602     int immhb = immh << 3 | immb;
6603     int size = 32 - clz32(immh) - 1;
6604     int shift = immhb - (8 << size);
6605     int pass;
6606
6607     assert(immh != 0);
6608     assert(!(scalar && is_q));
6609
6610     if (!scalar) {
6611         if (!is_q && extract32(immh, 3, 1)) {
6612             unallocated_encoding(s);
6613             return;
6614         }
6615
6616         /* Since we use the variable-shift helpers we must
6617          * replicate the shift count into each element of
6618          * the tcg_shift value.
6619          */
6620         switch (size) {
6621         case 0:
6622             shift |= shift << 8;
6623             /* fall through */
6624         case 1:
6625             shift |= shift << 16;
6626             break;
6627         case 2:
6628         case 3:
6629             break;
6630         default:
6631             g_assert_not_reached();
6632         }
6633     }
6634
6635     if (!fp_access_check(s)) {
6636         return;
6637     }
6638
6639     if (size == 3) {
6640         TCGv_i64 tcg_shift = tcg_const_i64(shift);
6641         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6642             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6643             { NULL, gen_helper_neon_qshl_u64 },
6644         };
6645         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6646         int maxpass = is_q ? 2 : 1;
6647
6648         for (pass = 0; pass < maxpass; pass++) {
6649             TCGv_i64 tcg_op = tcg_temp_new_i64();
6650
6651             read_vec_element(s, tcg_op, rn, pass, MO_64);
6652             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6653             write_vec_element(s, tcg_op, rd, pass, MO_64);
6654
6655             tcg_temp_free_i64(tcg_op);
6656         }
6657         tcg_temp_free_i64(tcg_shift);
6658
6659         if (!is_q) {
6660             clear_vec_high(s, rd);
6661         }
6662     } else {
6663         TCGv_i32 tcg_shift = tcg_const_i32(shift);
6664         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6665             {
6666                 { gen_helper_neon_qshl_s8,
6667                   gen_helper_neon_qshl_s16,
6668                   gen_helper_neon_qshl_s32 },
6669                 { gen_helper_neon_qshlu_s8,
6670                   gen_helper_neon_qshlu_s16,
6671                   gen_helper_neon_qshlu_s32 }
6672             }, {
6673                 { NULL, NULL, NULL },
6674                 { gen_helper_neon_qshl_u8,
6675                   gen_helper_neon_qshl_u16,
6676                   gen_helper_neon_qshl_u32 }
6677             }
6678         };
6679         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
6680         TCGMemOp memop = scalar ? size : MO_32;
6681         int maxpass = scalar ? 1 : is_q ? 4 : 2;
6682
6683         for (pass = 0; pass < maxpass; pass++) {
6684             TCGv_i32 tcg_op = tcg_temp_new_i32();
6685
6686             read_vec_element_i32(s, tcg_op, rn, pass, memop);
6687             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6688             if (scalar) {
6689                 switch (size) {
6690                 case 0:
6691                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
6692                     break;
6693                 case 1:
6694                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
6695                     break;
6696                 case 2:
6697                     break;
6698                 default:
6699                     g_assert_not_reached();
6700                 }
6701                 write_fp_sreg(s, rd, tcg_op);
6702             } else {
6703                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6704             }
6705
6706             tcg_temp_free_i32(tcg_op);
6707         }
6708         tcg_temp_free_i32(tcg_shift);
6709
6710         if (!is_q && !scalar) {
6711             clear_vec_high(s, rd);
6712         }
6713     }
6714 }
6715
6716 /* Common vector code for handling integer to FP conversion */
6717 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
6718                                    int elements, int is_signed,
6719                                    int fracbits, int size)
6720 {
6721     bool is_double = size == 3 ? true : false;
6722     TCGv_ptr tcg_fpst = get_fpstatus_ptr();
6723     TCGv_i32 tcg_shift = tcg_const_i32(fracbits);
6724     TCGv_i64 tcg_int = tcg_temp_new_i64();
6725     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
6726     int pass;
6727
6728     for (pass = 0; pass < elements; pass++) {
6729         read_vec_element(s, tcg_int, rn, pass, mop);
6730
6731         if (is_double) {
6732             TCGv_i64 tcg_double = tcg_temp_new_i64();
6733             if (is_signed) {
6734                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
6735                                      tcg_shift, tcg_fpst);
6736             } else {
6737                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
6738                                      tcg_shift, tcg_fpst);
6739             }
6740             if (elements == 1) {
6741                 write_fp_dreg(s, rd, tcg_double);
6742             } else {
6743                 write_vec_element(s, tcg_double, rd, pass, MO_64);
6744             }
6745             tcg_temp_free_i64(tcg_double);
6746         } else {
6747             TCGv_i32 tcg_single = tcg_temp_new_i32();
6748             if (is_signed) {
6749                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
6750                                      tcg_shift, tcg_fpst);
6751             } else {
6752                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
6753                                      tcg_shift, tcg_fpst);
6754             }
6755             if (elements == 1) {
6756                 write_fp_sreg(s, rd, tcg_single);
6757             } else {
6758                 write_vec_element_i32(s, tcg_single, rd, pass, MO_32);
6759             }
6760             tcg_temp_free_i32(tcg_single);
6761         }
6762     }
6763
6764     if (!is_double && elements == 2) {
6765         clear_vec_high(s, rd);
6766     }
6767
6768     tcg_temp_free_i64(tcg_int);
6769     tcg_temp_free_ptr(tcg_fpst);
6770     tcg_temp_free_i32(tcg_shift);
6771 }
6772
6773 /* UCVTF/SCVTF - Integer to FP conversion */
6774 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
6775                                          bool is_q, bool is_u,
6776                                          int immh, int immb, int opcode,
6777                                          int rn, int rd)
6778 {
6779     bool is_double = extract32(immh, 3, 1);
6780     int size = is_double ? MO_64 : MO_32;
6781     int elements;
6782     int immhb = immh << 3 | immb;
6783     int fracbits = (is_double ? 128 : 64) - immhb;
6784
6785     if (!extract32(immh, 2, 2)) {
6786         unallocated_encoding(s);
6787         return;
6788     }
6789
6790     if (is_scalar) {
6791         elements = 1;
6792     } else {
6793         elements = is_double ? 2 : is_q ? 4 : 2;
6794         if (is_double && !is_q) {
6795             unallocated_encoding(s);
6796             return;
6797         }
6798     }
6799
6800     if (!fp_access_check(s)) {
6801         return;
6802     }
6803
6804     /* immh == 0 would be a failure of the decode logic */
6805     g_assert(immh);
6806
6807     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
6808 }
6809
6810 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
6811 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
6812                                          bool is_q, bool is_u,
6813                                          int immh, int immb, int rn, int rd)
6814 {
6815     bool is_double = extract32(immh, 3, 1);
6816     int immhb = immh << 3 | immb;
6817     int fracbits = (is_double ? 128 : 64) - immhb;
6818     int pass;
6819     TCGv_ptr tcg_fpstatus;
6820     TCGv_i32 tcg_rmode, tcg_shift;
6821
6822     if (!extract32(immh, 2, 2)) {
6823         unallocated_encoding(s);
6824         return;
6825     }
6826
6827     if (!is_scalar && !is_q && is_double) {
6828         unallocated_encoding(s);
6829         return;
6830     }
6831
6832     if (!fp_access_check(s)) {
6833         return;
6834     }
6835
6836     assert(!(is_scalar && is_q));
6837
6838     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
6839     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6840     tcg_fpstatus = get_fpstatus_ptr();
6841     tcg_shift = tcg_const_i32(fracbits);
6842
6843     if (is_double) {
6844         int maxpass = is_scalar ? 1 : 2;
6845
6846         for (pass = 0; pass < maxpass; pass++) {
6847             TCGv_i64 tcg_op = tcg_temp_new_i64();
6848
6849             read_vec_element(s, tcg_op, rn, pass, MO_64);
6850             if (is_u) {
6851                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6852             } else {
6853                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6854             }
6855             write_vec_element(s, tcg_op, rd, pass, MO_64);
6856             tcg_temp_free_i64(tcg_op);
6857         }
6858         if (!is_q) {
6859             clear_vec_high(s, rd);
6860         }
6861     } else {
6862         int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
6863         for (pass = 0; pass < maxpass; pass++) {
6864             TCGv_i32 tcg_op = tcg_temp_new_i32();
6865
6866             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
6867             if (is_u) {
6868                 gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6869             } else {
6870                 gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
6871             }
6872             if (is_scalar) {
6873                 write_fp_sreg(s, rd, tcg_op);
6874             } else {
6875                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
6876             }
6877             tcg_temp_free_i32(tcg_op);
6878         }
6879         if (!is_q && !is_scalar) {
6880             clear_vec_high(s, rd);
6881         }
6882     }
6883
6884     tcg_temp_free_ptr(tcg_fpstatus);
6885     tcg_temp_free_i32(tcg_shift);
6886     gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
6887     tcg_temp_free_i32(tcg_rmode);
6888 }
6889
6890 /* C3.6.9 AdvSIMD scalar shift by immediate
6891  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
6892  * +-----+---+-------------+------+------+--------+---+------+------+
6893  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
6894  * +-----+---+-------------+------+------+--------+---+------+------+
6895  *
6896  * This is the scalar version so it works on a fixed sized registers
6897  */
6898 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
6899 {
6900     int rd = extract32(insn, 0, 5);
6901     int rn = extract32(insn, 5, 5);
6902     int opcode = extract32(insn, 11, 5);
6903     int immb = extract32(insn, 16, 3);
6904     int immh = extract32(insn, 19, 4);
6905     bool is_u = extract32(insn, 29, 1);
6906
6907     if (immh == 0) {
6908         unallocated_encoding(s);
6909         return;
6910     }
6911
6912     switch (opcode) {
6913     case 0x08: /* SRI */
6914         if (!is_u) {
6915             unallocated_encoding(s);
6916             return;
6917         }
6918         /* fall through */
6919     case 0x00: /* SSHR / USHR */
6920     case 0x02: /* SSRA / USRA */
6921     case 0x04: /* SRSHR / URSHR */
6922     case 0x06: /* SRSRA / URSRA */
6923         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
6924         break;
6925     case 0x0a: /* SHL / SLI */
6926         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
6927         break;
6928     case 0x1c: /* SCVTF, UCVTF */
6929         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
6930                                      opcode, rn, rd);
6931         break;
6932     case 0x10: /* SQSHRUN, SQSHRUN2 */
6933     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
6934         if (!is_u) {
6935             unallocated_encoding(s);
6936             return;
6937         }
6938         handle_vec_simd_sqshrn(s, true, false, false, true,
6939                                immh, immb, opcode, rn, rd);
6940         break;
6941     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
6942     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
6943         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
6944                                immh, immb, opcode, rn, rd);
6945         break;
6946     case 0xc: /* SQSHLU */
6947         if (!is_u) {
6948             unallocated_encoding(s);
6949             return;
6950         }
6951         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
6952         break;
6953     case 0xe: /* SQSHL, UQSHL */
6954         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
6955         break;
6956     case 0x1f: /* FCVTZS, FCVTZU */
6957         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
6958         break;
6959     default:
6960         unallocated_encoding(s);
6961         break;
6962     }
6963 }
6964
6965 /* C3.6.10 AdvSIMD scalar three different
6966  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
6967  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6968  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
6969  * +-----+---+-----------+------+---+------+--------+-----+------+------+
6970  */
6971 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
6972 {
6973     bool is_u = extract32(insn, 29, 1);
6974     int size = extract32(insn, 22, 2);
6975     int opcode = extract32(insn, 12, 4);
6976     int rm = extract32(insn, 16, 5);
6977     int rn = extract32(insn, 5, 5);
6978     int rd = extract32(insn, 0, 5);
6979
6980     if (is_u) {
6981         unallocated_encoding(s);
6982         return;
6983     }
6984
6985     switch (opcode) {
6986     case 0x9: /* SQDMLAL, SQDMLAL2 */
6987     case 0xb: /* SQDMLSL, SQDMLSL2 */
6988     case 0xd: /* SQDMULL, SQDMULL2 */
6989         if (size == 0 || size == 3) {
6990             unallocated_encoding(s);
6991             return;
6992         }
6993         break;
6994     default:
6995         unallocated_encoding(s);
6996         return;
6997     }
6998
6999     if (!fp_access_check(s)) {
7000         return;
7001     }
7002
7003     if (size == 2) {
7004         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7005         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7006         TCGv_i64 tcg_res = tcg_temp_new_i64();
7007
7008         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
7009         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
7010
7011         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
7012         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
7013
7014         switch (opcode) {
7015         case 0xd: /* SQDMULL, SQDMULL2 */
7016             break;
7017         case 0xb: /* SQDMLSL, SQDMLSL2 */
7018             tcg_gen_neg_i64(tcg_res, tcg_res);
7019             /* fall through */
7020         case 0x9: /* SQDMLAL, SQDMLAL2 */
7021             read_vec_element(s, tcg_op1, rd, 0, MO_64);
7022             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
7023                                               tcg_res, tcg_op1);
7024             break;
7025         default:
7026             g_assert_not_reached();
7027         }
7028
7029         write_fp_dreg(s, rd, tcg_res);
7030
7031         tcg_temp_free_i64(tcg_op1);
7032         tcg_temp_free_i64(tcg_op2);
7033         tcg_temp_free_i64(tcg_res);
7034     } else {
7035         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7036         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7037         TCGv_i64 tcg_res = tcg_temp_new_i64();
7038
7039         read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
7040         read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
7041
7042         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
7043         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
7044
7045         switch (opcode) {
7046         case 0xd: /* SQDMULL, SQDMULL2 */
7047             break;
7048         case 0xb: /* SQDMLSL, SQDMLSL2 */
7049             gen_helper_neon_negl_u32(tcg_res, tcg_res);
7050             /* fall through */
7051         case 0x9: /* SQDMLAL, SQDMLAL2 */
7052         {
7053             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
7054             read_vec_element(s, tcg_op3, rd, 0, MO_32);
7055             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
7056                                               tcg_res, tcg_op3);
7057             tcg_temp_free_i64(tcg_op3);
7058             break;
7059         }
7060         default:
7061             g_assert_not_reached();
7062         }
7063
7064         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7065         write_fp_dreg(s, rd, tcg_res);
7066
7067         tcg_temp_free_i32(tcg_op1);
7068         tcg_temp_free_i32(tcg_op2);
7069         tcg_temp_free_i64(tcg_res);
7070     }
7071 }
7072
7073 static void handle_3same_64(DisasContext *s, int opcode, bool u,
7074                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
7075 {
7076     /* Handle 64x64->64 opcodes which are shared between the scalar
7077      * and vector 3-same groups. We cover every opcode where size == 3
7078      * is valid in either the three-reg-same (integer, not pairwise)
7079      * or scalar-three-reg-same groups. (Some opcodes are not yet
7080      * implemented.)
7081      */
7082     TCGCond cond;
7083
7084     switch (opcode) {
7085     case 0x1: /* SQADD */
7086         if (u) {
7087             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7088         } else {
7089             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7090         }
7091         break;
7092     case 0x5: /* SQSUB */
7093         if (u) {
7094             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7095         } else {
7096             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7097         }
7098         break;
7099     case 0x6: /* CMGT, CMHI */
7100         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
7101          * We implement this using setcond (test) and then negating.
7102          */
7103         cond = u ? TCG_COND_GTU : TCG_COND_GT;
7104     do_cmop:
7105         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
7106         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7107         break;
7108     case 0x7: /* CMGE, CMHS */
7109         cond = u ? TCG_COND_GEU : TCG_COND_GE;
7110         goto do_cmop;
7111     case 0x11: /* CMTST, CMEQ */
7112         if (u) {
7113             cond = TCG_COND_EQ;
7114             goto do_cmop;
7115         }
7116         /* CMTST : test is "if (X & Y != 0)". */
7117         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
7118         tcg_gen_setcondi_i64(TCG_COND_NE, tcg_rd, tcg_rd, 0);
7119         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7120         break;
7121     case 0x8: /* SSHL, USHL */
7122         if (u) {
7123             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7124         } else {
7125             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7126         }
7127         break;
7128     case 0x9: /* SQSHL, UQSHL */
7129         if (u) {
7130             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7131         } else {
7132             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7133         }
7134         break;
7135     case 0xa: /* SRSHL, URSHL */
7136         if (u) {
7137             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7138         } else {
7139             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7140         }
7141         break;
7142     case 0xb: /* SQRSHL, UQRSHL */
7143         if (u) {
7144             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7145         } else {
7146             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7147         }
7148         break;
7149     case 0x10: /* ADD, SUB */
7150         if (u) {
7151             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7152         } else {
7153             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7154         }
7155         break;
7156     default:
7157         g_assert_not_reached();
7158     }
7159 }
7160
7161 /* Handle the 3-same-operands float operations; shared by the scalar
7162  * and vector encodings. The caller must filter out any encodings
7163  * not allocated for the encoding it is dealing with.
7164  */
7165 static void handle_3same_float(DisasContext *s, int size, int elements,
7166                                int fpopcode, int rd, int rn, int rm)
7167 {
7168     int pass;
7169     TCGv_ptr fpst = get_fpstatus_ptr();
7170
7171     for (pass = 0; pass < elements; pass++) {
7172         if (size) {
7173             /* Double */
7174             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7175             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7176             TCGv_i64 tcg_res = tcg_temp_new_i64();
7177
7178             read_vec_element(s, tcg_op1, rn, pass, MO_64);
7179             read_vec_element(s, tcg_op2, rm, pass, MO_64);
7180
7181             switch (fpopcode) {
7182             case 0x39: /* FMLS */
7183                 /* As usual for ARM, separate negation for fused multiply-add */
7184                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
7185                 /* fall through */
7186             case 0x19: /* FMLA */
7187                 read_vec_element(s, tcg_res, rd, pass, MO_64);
7188                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7189                                        tcg_res, fpst);
7190                 break;
7191             case 0x18: /* FMAXNM */
7192                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7193                 break;
7194             case 0x1a: /* FADD */
7195                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7196                 break;
7197             case 0x1b: /* FMULX */
7198                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7199                 break;
7200             case 0x1c: /* FCMEQ */
7201                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7202                 break;
7203             case 0x1e: /* FMAX */
7204                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7205                 break;
7206             case 0x1f: /* FRECPS */
7207                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7208                 break;
7209             case 0x38: /* FMINNM */
7210                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7211                 break;
7212             case 0x3a: /* FSUB */
7213                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7214                 break;
7215             case 0x3e: /* FMIN */
7216                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7217                 break;
7218             case 0x3f: /* FRSQRTS */
7219                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7220                 break;
7221             case 0x5b: /* FMUL */
7222                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7223                 break;
7224             case 0x5c: /* FCMGE */
7225                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7226                 break;
7227             case 0x5d: /* FACGE */
7228                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7229                 break;
7230             case 0x5f: /* FDIV */
7231                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7232                 break;
7233             case 0x7a: /* FABD */
7234                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7235                 gen_helper_vfp_absd(tcg_res, tcg_res);
7236                 break;
7237             case 0x7c: /* FCMGT */
7238                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7239                 break;
7240             case 0x7d: /* FACGT */
7241                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7242                 break;
7243             default:
7244                 g_assert_not_reached();
7245             }
7246
7247             write_vec_element(s, tcg_res, rd, pass, MO_64);
7248
7249             tcg_temp_free_i64(tcg_res);
7250             tcg_temp_free_i64(tcg_op1);
7251             tcg_temp_free_i64(tcg_op2);
7252         } else {
7253             /* Single */
7254             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7255             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7256             TCGv_i32 tcg_res = tcg_temp_new_i32();
7257
7258             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7259             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7260
7261             switch (fpopcode) {
7262             case 0x39: /* FMLS */
7263                 /* As usual for ARM, separate negation for fused multiply-add */
7264                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
7265                 /* fall through */
7266             case 0x19: /* FMLA */
7267                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7268                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7269                                        tcg_res, fpst);
7270                 break;
7271             case 0x1a: /* FADD */
7272                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7273                 break;
7274             case 0x1b: /* FMULX */
7275                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7276                 break;
7277             case 0x1c: /* FCMEQ */
7278                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7279                 break;
7280             case 0x1e: /* FMAX */
7281                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7282                 break;
7283             case 0x1f: /* FRECPS */
7284                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7285                 break;
7286             case 0x18: /* FMAXNM */
7287                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7288                 break;
7289             case 0x38: /* FMINNM */
7290                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7291                 break;
7292             case 0x3a: /* FSUB */
7293                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7294                 break;
7295             case 0x3e: /* FMIN */
7296                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7297                 break;
7298             case 0x3f: /* FRSQRTS */
7299                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7300                 break;
7301             case 0x5b: /* FMUL */
7302                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7303                 break;
7304             case 0x5c: /* FCMGE */
7305                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7306                 break;
7307             case 0x5d: /* FACGE */
7308                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7309                 break;
7310             case 0x5f: /* FDIV */
7311                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7312                 break;
7313             case 0x7a: /* FABD */
7314                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7315                 gen_helper_vfp_abss(tcg_res, tcg_res);
7316                 break;
7317             case 0x7c: /* FCMGT */
7318                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7319                 break;
7320             case 0x7d: /* FACGT */
7321                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7322                 break;
7323             default:
7324                 g_assert_not_reached();
7325             }
7326
7327             if (elements == 1) {
7328                 /* scalar single so clear high part */
7329                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7330
7331                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7332                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7333                 tcg_temp_free_i64(tcg_tmp);
7334             } else {
7335                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7336             }
7337
7338             tcg_temp_free_i32(tcg_res);
7339             tcg_temp_free_i32(tcg_op1);
7340             tcg_temp_free_i32(tcg_op2);
7341         }
7342     }
7343
7344     tcg_temp_free_ptr(fpst);
7345
7346     if ((elements << size) < 4) {
7347         /* scalar, or non-quad vector op */
7348         clear_vec_high(s, rd);
7349     }
7350 }
7351
7352 /* C3.6.11 AdvSIMD scalar three same
7353  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7354  * +-----+---+-----------+------+---+------+--------+---+------+------+
7355  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7356  * +-----+---+-----------+------+---+------+--------+---+------+------+
7357  */
7358 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7359 {
7360     int rd = extract32(insn, 0, 5);
7361     int rn = extract32(insn, 5, 5);
7362     int opcode = extract32(insn, 11, 5);
7363     int rm = extract32(insn, 16, 5);
7364     int size = extract32(insn, 22, 2);
7365     bool u = extract32(insn, 29, 1);
7366     TCGv_i64 tcg_rd;
7367
7368     if (opcode >= 0x18) {
7369         /* Floating point: U, size[1] and opcode indicate operation */
7370         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7371         switch (fpopcode) {
7372         case 0x1b: /* FMULX */
7373         case 0x1f: /* FRECPS */
7374         case 0x3f: /* FRSQRTS */
7375         case 0x5d: /* FACGE */
7376         case 0x7d: /* FACGT */
7377         case 0x1c: /* FCMEQ */
7378         case 0x5c: /* FCMGE */
7379         case 0x7c: /* FCMGT */
7380         case 0x7a: /* FABD */
7381             break;
7382         default:
7383             unallocated_encoding(s);
7384             return;
7385         }
7386
7387         if (!fp_access_check(s)) {
7388             return;
7389         }
7390
7391         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7392         return;
7393     }
7394
7395     switch (opcode) {
7396     case 0x1: /* SQADD, UQADD */
7397     case 0x5: /* SQSUB, UQSUB */
7398     case 0x9: /* SQSHL, UQSHL */
7399     case 0xb: /* SQRSHL, UQRSHL */
7400         break;
7401     case 0x8: /* SSHL, USHL */
7402     case 0xa: /* SRSHL, URSHL */
7403     case 0x6: /* CMGT, CMHI */
7404     case 0x7: /* CMGE, CMHS */
7405     case 0x11: /* CMTST, CMEQ */
7406     case 0x10: /* ADD, SUB (vector) */
7407         if (size != 3) {
7408             unallocated_encoding(s);
7409             return;
7410         }
7411         break;
7412     case 0x16: /* SQDMULH, SQRDMULH (vector) */
7413         if (size != 1 && size != 2) {
7414             unallocated_encoding(s);
7415             return;
7416         }
7417         break;
7418     default:
7419         unallocated_encoding(s);
7420         return;
7421     }
7422
7423     if (!fp_access_check(s)) {
7424         return;
7425     }
7426
7427     tcg_rd = tcg_temp_new_i64();
7428
7429     if (size == 3) {
7430         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7431         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7432
7433         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7434         tcg_temp_free_i64(tcg_rn);
7435         tcg_temp_free_i64(tcg_rm);
7436     } else {
7437         /* Do a single operation on the lowest element in the vector.
7438          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7439          * no side effects for all these operations.
7440          * OPTME: special-purpose helpers would avoid doing some
7441          * unnecessary work in the helper for the 8 and 16 bit cases.
7442          */
7443         NeonGenTwoOpEnvFn *genenvfn;
7444         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7445         TCGv_i32 tcg_rm = tcg_temp_new_i32();
7446         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7447
7448         read_vec_element_i32(s, tcg_rn, rn, 0, size);
7449         read_vec_element_i32(s, tcg_rm, rm, 0, size);
7450
7451         switch (opcode) {
7452         case 0x1: /* SQADD, UQADD */
7453         {
7454             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7455                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7456                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7457                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7458             };
7459             genenvfn = fns[size][u];
7460             break;
7461         }
7462         case 0x5: /* SQSUB, UQSUB */
7463         {
7464             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7465                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7466                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7467                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7468             };
7469             genenvfn = fns[size][u];
7470             break;
7471         }
7472         case 0x9: /* SQSHL, UQSHL */
7473         {
7474             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7475                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7476                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7477                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7478             };
7479             genenvfn = fns[size][u];
7480             break;
7481         }
7482         case 0xb: /* SQRSHL, UQRSHL */
7483         {
7484             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7485                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7486                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7487                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7488             };
7489             genenvfn = fns[size][u];
7490             break;
7491         }
7492         case 0x16: /* SQDMULH, SQRDMULH */
7493         {
7494             static NeonGenTwoOpEnvFn * const fns[2][2] = {
7495                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7496                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7497             };
7498             assert(size == 1 || size == 2);
7499             genenvfn = fns[size - 1][u];
7500             break;
7501         }
7502         default:
7503             g_assert_not_reached();
7504         }
7505
7506         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7507         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7508         tcg_temp_free_i32(tcg_rd32);
7509         tcg_temp_free_i32(tcg_rn);
7510         tcg_temp_free_i32(tcg_rm);
7511     }
7512
7513     write_fp_dreg(s, rd, tcg_rd);
7514
7515     tcg_temp_free_i64(tcg_rd);
7516 }
7517
7518 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
7519                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
7520                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
7521 {
7522     /* Handle 64->64 opcodes which are shared between the scalar and
7523      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
7524      * is valid in either group and also the double-precision fp ops.
7525      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
7526      * requires them.
7527      */
7528     TCGCond cond;
7529
7530     switch (opcode) {
7531     case 0x4: /* CLS, CLZ */
7532         if (u) {
7533             gen_helper_clz64(tcg_rd, tcg_rn);
7534         } else {
7535             gen_helper_cls64(tcg_rd, tcg_rn);
7536         }
7537         break;
7538     case 0x5: /* NOT */
7539         /* This opcode is shared with CNT and RBIT but we have earlier
7540          * enforced that size == 3 if and only if this is the NOT insn.
7541          */
7542         tcg_gen_not_i64(tcg_rd, tcg_rn);
7543         break;
7544     case 0x7: /* SQABS, SQNEG */
7545         if (u) {
7546             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
7547         } else {
7548             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
7549         }
7550         break;
7551     case 0xa: /* CMLT */
7552         /* 64 bit integer comparison against zero, result is
7553          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
7554          * subtracting 1.
7555          */
7556         cond = TCG_COND_LT;
7557     do_cmop:
7558         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
7559         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7560         break;
7561     case 0x8: /* CMGT, CMGE */
7562         cond = u ? TCG_COND_GE : TCG_COND_GT;
7563         goto do_cmop;
7564     case 0x9: /* CMEQ, CMLE */
7565         cond = u ? TCG_COND_LE : TCG_COND_EQ;
7566         goto do_cmop;
7567     case 0xb: /* ABS, NEG */
7568         if (u) {
7569             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7570         } else {
7571             TCGv_i64 tcg_zero = tcg_const_i64(0);
7572             tcg_gen_neg_i64(tcg_rd, tcg_rn);
7573             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
7574                                 tcg_rn, tcg_rd);
7575             tcg_temp_free_i64(tcg_zero);
7576         }
7577         break;
7578     case 0x2f: /* FABS */
7579         gen_helper_vfp_absd(tcg_rd, tcg_rn);
7580         break;
7581     case 0x6f: /* FNEG */
7582         gen_helper_vfp_negd(tcg_rd, tcg_rn);
7583         break;
7584     case 0x7f: /* FSQRT */
7585         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
7586         break;
7587     case 0x1a: /* FCVTNS */
7588     case 0x1b: /* FCVTMS */
7589     case 0x1c: /* FCVTAS */
7590     case 0x3a: /* FCVTPS */
7591     case 0x3b: /* FCVTZS */
7592     {
7593         TCGv_i32 tcg_shift = tcg_const_i32(0);
7594         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7595         tcg_temp_free_i32(tcg_shift);
7596         break;
7597     }
7598     case 0x5a: /* FCVTNU */
7599     case 0x5b: /* FCVTMU */
7600     case 0x5c: /* FCVTAU */
7601     case 0x7a: /* FCVTPU */
7602     case 0x7b: /* FCVTZU */
7603     {
7604         TCGv_i32 tcg_shift = tcg_const_i32(0);
7605         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
7606         tcg_temp_free_i32(tcg_shift);
7607         break;
7608     }
7609     case 0x18: /* FRINTN */
7610     case 0x19: /* FRINTM */
7611     case 0x38: /* FRINTP */
7612     case 0x39: /* FRINTZ */
7613     case 0x58: /* FRINTA */
7614     case 0x79: /* FRINTI */
7615         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
7616         break;
7617     case 0x59: /* FRINTX */
7618         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
7619         break;
7620     default:
7621         g_assert_not_reached();
7622     }
7623 }
7624
7625 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
7626                                    bool is_scalar, bool is_u, bool is_q,
7627                                    int size, int rn, int rd)
7628 {
7629     bool is_double = (size == 3);
7630     TCGv_ptr fpst;
7631
7632     if (!fp_access_check(s)) {
7633         return;
7634     }
7635
7636     fpst = get_fpstatus_ptr();
7637
7638     if (is_double) {
7639         TCGv_i64 tcg_op = tcg_temp_new_i64();
7640         TCGv_i64 tcg_zero = tcg_const_i64(0);
7641         TCGv_i64 tcg_res = tcg_temp_new_i64();
7642         NeonGenTwoDoubleOPFn *genfn;
7643         bool swap = false;
7644         int pass;
7645
7646         switch (opcode) {
7647         case 0x2e: /* FCMLT (zero) */
7648             swap = true;
7649             /* fallthrough */
7650         case 0x2c: /* FCMGT (zero) */
7651             genfn = gen_helper_neon_cgt_f64;
7652             break;
7653         case 0x2d: /* FCMEQ (zero) */
7654             genfn = gen_helper_neon_ceq_f64;
7655             break;
7656         case 0x6d: /* FCMLE (zero) */
7657             swap = true;
7658             /* fall through */
7659         case 0x6c: /* FCMGE (zero) */
7660             genfn = gen_helper_neon_cge_f64;
7661             break;
7662         default:
7663             g_assert_not_reached();
7664         }
7665
7666         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7667             read_vec_element(s, tcg_op, rn, pass, MO_64);
7668             if (swap) {
7669                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7670             } else {
7671                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7672             }
7673             write_vec_element(s, tcg_res, rd, pass, MO_64);
7674         }
7675         if (is_scalar) {
7676             clear_vec_high(s, rd);
7677         }
7678
7679         tcg_temp_free_i64(tcg_res);
7680         tcg_temp_free_i64(tcg_zero);
7681         tcg_temp_free_i64(tcg_op);
7682     } else {
7683         TCGv_i32 tcg_op = tcg_temp_new_i32();
7684         TCGv_i32 tcg_zero = tcg_const_i32(0);
7685         TCGv_i32 tcg_res = tcg_temp_new_i32();
7686         NeonGenTwoSingleOPFn *genfn;
7687         bool swap = false;
7688         int pass, maxpasses;
7689
7690         switch (opcode) {
7691         case 0x2e: /* FCMLT (zero) */
7692             swap = true;
7693             /* fall through */
7694         case 0x2c: /* FCMGT (zero) */
7695             genfn = gen_helper_neon_cgt_f32;
7696             break;
7697         case 0x2d: /* FCMEQ (zero) */
7698             genfn = gen_helper_neon_ceq_f32;
7699             break;
7700         case 0x6d: /* FCMLE (zero) */
7701             swap = true;
7702             /* fall through */
7703         case 0x6c: /* FCMGE (zero) */
7704             genfn = gen_helper_neon_cge_f32;
7705             break;
7706         default:
7707             g_assert_not_reached();
7708         }
7709
7710         if (is_scalar) {
7711             maxpasses = 1;
7712         } else {
7713             maxpasses = is_q ? 4 : 2;
7714         }
7715
7716         for (pass = 0; pass < maxpasses; pass++) {
7717             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7718             if (swap) {
7719                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
7720             } else {
7721                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
7722             }
7723             if (is_scalar) {
7724                 write_fp_sreg(s, rd, tcg_res);
7725             } else {
7726                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7727             }
7728         }
7729         tcg_temp_free_i32(tcg_res);
7730         tcg_temp_free_i32(tcg_zero);
7731         tcg_temp_free_i32(tcg_op);
7732         if (!is_q && !is_scalar) {
7733             clear_vec_high(s, rd);
7734         }
7735     }
7736
7737     tcg_temp_free_ptr(fpst);
7738 }
7739
7740 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
7741                                     bool is_scalar, bool is_u, bool is_q,
7742                                     int size, int rn, int rd)
7743 {
7744     bool is_double = (size == 3);
7745     TCGv_ptr fpst = get_fpstatus_ptr();
7746
7747     if (is_double) {
7748         TCGv_i64 tcg_op = tcg_temp_new_i64();
7749         TCGv_i64 tcg_res = tcg_temp_new_i64();
7750         int pass;
7751
7752         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7753             read_vec_element(s, tcg_op, rn, pass, MO_64);
7754             switch (opcode) {
7755             case 0x3d: /* FRECPE */
7756                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
7757                 break;
7758             case 0x3f: /* FRECPX */
7759                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
7760                 break;
7761             case 0x7d: /* FRSQRTE */
7762                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
7763                 break;
7764             default:
7765                 g_assert_not_reached();
7766             }
7767             write_vec_element(s, tcg_res, rd, pass, MO_64);
7768         }
7769         if (is_scalar) {
7770             clear_vec_high(s, rd);
7771         }
7772
7773         tcg_temp_free_i64(tcg_res);
7774         tcg_temp_free_i64(tcg_op);
7775     } else {
7776         TCGv_i32 tcg_op = tcg_temp_new_i32();
7777         TCGv_i32 tcg_res = tcg_temp_new_i32();
7778         int pass, maxpasses;
7779
7780         if (is_scalar) {
7781             maxpasses = 1;
7782         } else {
7783             maxpasses = is_q ? 4 : 2;
7784         }
7785
7786         for (pass = 0; pass < maxpasses; pass++) {
7787             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7788
7789             switch (opcode) {
7790             case 0x3c: /* URECPE */
7791                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
7792                 break;
7793             case 0x3d: /* FRECPE */
7794                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
7795                 break;
7796             case 0x3f: /* FRECPX */
7797                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
7798                 break;
7799             case 0x7d: /* FRSQRTE */
7800                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
7801                 break;
7802             default:
7803                 g_assert_not_reached();
7804             }
7805
7806             if (is_scalar) {
7807                 write_fp_sreg(s, rd, tcg_res);
7808             } else {
7809                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7810             }
7811         }
7812         tcg_temp_free_i32(tcg_res);
7813         tcg_temp_free_i32(tcg_op);
7814         if (!is_q && !is_scalar) {
7815             clear_vec_high(s, rd);
7816         }
7817     }
7818     tcg_temp_free_ptr(fpst);
7819 }
7820
7821 static void handle_2misc_narrow(DisasContext *s, bool scalar,
7822                                 int opcode, bool u, bool is_q,
7823                                 int size, int rn, int rd)
7824 {
7825     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
7826      * in the source becomes a size element in the destination).
7827      */
7828     int pass;
7829     TCGv_i32 tcg_res[2];
7830     int destelt = is_q ? 2 : 0;
7831     int passes = scalar ? 1 : 2;
7832
7833     if (scalar) {
7834         tcg_res[1] = tcg_const_i32(0);
7835     }
7836
7837     for (pass = 0; pass < passes; pass++) {
7838         TCGv_i64 tcg_op = tcg_temp_new_i64();
7839         NeonGenNarrowFn *genfn = NULL;
7840         NeonGenNarrowEnvFn *genenvfn = NULL;
7841
7842         if (scalar) {
7843             read_vec_element(s, tcg_op, rn, pass, size + 1);
7844         } else {
7845             read_vec_element(s, tcg_op, rn, pass, MO_64);
7846         }
7847         tcg_res[pass] = tcg_temp_new_i32();
7848
7849         switch (opcode) {
7850         case 0x12: /* XTN, SQXTUN */
7851         {
7852             static NeonGenNarrowFn * const xtnfns[3] = {
7853                 gen_helper_neon_narrow_u8,
7854                 gen_helper_neon_narrow_u16,
7855                 tcg_gen_extrl_i64_i32,
7856             };
7857             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
7858                 gen_helper_neon_unarrow_sat8,
7859                 gen_helper_neon_unarrow_sat16,
7860                 gen_helper_neon_unarrow_sat32,
7861             };
7862             if (u) {
7863                 genenvfn = sqxtunfns[size];
7864             } else {
7865                 genfn = xtnfns[size];
7866             }
7867             break;
7868         }
7869         case 0x14: /* SQXTN, UQXTN */
7870         {
7871             static NeonGenNarrowEnvFn * const fns[3][2] = {
7872                 { gen_helper_neon_narrow_sat_s8,
7873                   gen_helper_neon_narrow_sat_u8 },
7874                 { gen_helper_neon_narrow_sat_s16,
7875                   gen_helper_neon_narrow_sat_u16 },
7876                 { gen_helper_neon_narrow_sat_s32,
7877                   gen_helper_neon_narrow_sat_u32 },
7878             };
7879             genenvfn = fns[size][u];
7880             break;
7881         }
7882         case 0x16: /* FCVTN, FCVTN2 */
7883             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
7884             if (size == 2) {
7885                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
7886             } else {
7887                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
7888                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
7889                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
7890                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
7891                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
7892                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
7893                 tcg_temp_free_i32(tcg_lo);
7894                 tcg_temp_free_i32(tcg_hi);
7895             }
7896             break;
7897         case 0x56:  /* FCVTXN, FCVTXN2 */
7898             /* 64 bit to 32 bit float conversion
7899              * with von Neumann rounding (round to odd)
7900              */
7901             assert(size == 2);
7902             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
7903             break;
7904         default:
7905             g_assert_not_reached();
7906         }
7907
7908         if (genfn) {
7909             genfn(tcg_res[pass], tcg_op);
7910         } else if (genenvfn) {
7911             genenvfn(tcg_res[pass], cpu_env, tcg_op);
7912         }
7913
7914         tcg_temp_free_i64(tcg_op);
7915     }
7916
7917     for (pass = 0; pass < 2; pass++) {
7918         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
7919         tcg_temp_free_i32(tcg_res[pass]);
7920     }
7921     if (!is_q) {
7922         clear_vec_high(s, rd);
7923     }
7924 }
7925
7926 /* Remaining saturating accumulating ops */
7927 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
7928                                 bool is_q, int size, int rn, int rd)
7929 {
7930     bool is_double = (size == 3);
7931
7932     if (is_double) {
7933         TCGv_i64 tcg_rn = tcg_temp_new_i64();
7934         TCGv_i64 tcg_rd = tcg_temp_new_i64();
7935         int pass;
7936
7937         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
7938             read_vec_element(s, tcg_rn, rn, pass, MO_64);
7939             read_vec_element(s, tcg_rd, rd, pass, MO_64);
7940
7941             if (is_u) { /* USQADD */
7942                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7943             } else { /* SUQADD */
7944                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7945             }
7946             write_vec_element(s, tcg_rd, rd, pass, MO_64);
7947         }
7948         if (is_scalar) {
7949             clear_vec_high(s, rd);
7950         }
7951
7952         tcg_temp_free_i64(tcg_rd);
7953         tcg_temp_free_i64(tcg_rn);
7954     } else {
7955         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7956         TCGv_i32 tcg_rd = tcg_temp_new_i32();
7957         int pass, maxpasses;
7958
7959         if (is_scalar) {
7960             maxpasses = 1;
7961         } else {
7962             maxpasses = is_q ? 4 : 2;
7963         }
7964
7965         for (pass = 0; pass < maxpasses; pass++) {
7966             if (is_scalar) {
7967                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
7968                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
7969             } else {
7970                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
7971                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
7972             }
7973
7974             if (is_u) { /* USQADD */
7975                 switch (size) {
7976                 case 0:
7977                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7978                     break;
7979                 case 1:
7980                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7981                     break;
7982                 case 2:
7983                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7984                     break;
7985                 default:
7986                     g_assert_not_reached();
7987                 }
7988             } else { /* SUQADD */
7989                 switch (size) {
7990                 case 0:
7991                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7992                     break;
7993                 case 1:
7994                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7995                     break;
7996                 case 2:
7997                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
7998                     break;
7999                 default:
8000                     g_assert_not_reached();
8001                 }
8002             }
8003
8004             if (is_scalar) {
8005                 TCGv_i64 tcg_zero = tcg_const_i64(0);
8006                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
8007                 tcg_temp_free_i64(tcg_zero);
8008             }
8009             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8010         }
8011
8012         if (!is_q) {
8013             clear_vec_high(s, rd);
8014         }
8015
8016         tcg_temp_free_i32(tcg_rd);
8017         tcg_temp_free_i32(tcg_rn);
8018     }
8019 }
8020
8021 /* C3.6.12 AdvSIMD scalar two reg misc
8022  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8023  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8024  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8025  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8026  */
8027 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
8028 {
8029     int rd = extract32(insn, 0, 5);
8030     int rn = extract32(insn, 5, 5);
8031     int opcode = extract32(insn, 12, 5);
8032     int size = extract32(insn, 22, 2);
8033     bool u = extract32(insn, 29, 1);
8034     bool is_fcvt = false;
8035     int rmode;
8036     TCGv_i32 tcg_rmode;
8037     TCGv_ptr tcg_fpstatus;
8038
8039     switch (opcode) {
8040     case 0x3: /* USQADD / SUQADD*/
8041         if (!fp_access_check(s)) {
8042             return;
8043         }
8044         handle_2misc_satacc(s, true, u, false, size, rn, rd);
8045         return;
8046     case 0x7: /* SQABS / SQNEG */
8047         break;
8048     case 0xa: /* CMLT */
8049         if (u) {
8050             unallocated_encoding(s);
8051             return;
8052         }
8053         /* fall through */
8054     case 0x8: /* CMGT, CMGE */
8055     case 0x9: /* CMEQ, CMLE */
8056     case 0xb: /* ABS, NEG */
8057         if (size != 3) {
8058             unallocated_encoding(s);
8059             return;
8060         }
8061         break;
8062     case 0x12: /* SQXTUN */
8063         if (!u) {
8064             unallocated_encoding(s);
8065             return;
8066         }
8067         /* fall through */
8068     case 0x14: /* SQXTN, UQXTN */
8069         if (size == 3) {
8070             unallocated_encoding(s);
8071             return;
8072         }
8073         if (!fp_access_check(s)) {
8074             return;
8075         }
8076         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
8077         return;
8078     case 0xc ... 0xf:
8079     case 0x16 ... 0x1d:
8080     case 0x1f:
8081         /* Floating point: U, size[1] and opcode indicate operation;
8082          * size[0] indicates single or double precision.
8083          */
8084         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
8085         size = extract32(size, 0, 1) ? 3 : 2;
8086         switch (opcode) {
8087         case 0x2c: /* FCMGT (zero) */
8088         case 0x2d: /* FCMEQ (zero) */
8089         case 0x2e: /* FCMLT (zero) */
8090         case 0x6c: /* FCMGE (zero) */
8091         case 0x6d: /* FCMLE (zero) */
8092             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
8093             return;
8094         case 0x1d: /* SCVTF */
8095         case 0x5d: /* UCVTF */
8096         {
8097             bool is_signed = (opcode == 0x1d);
8098             if (!fp_access_check(s)) {
8099                 return;
8100             }
8101             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
8102             return;
8103         }
8104         case 0x3d: /* FRECPE */
8105         case 0x3f: /* FRECPX */
8106         case 0x7d: /* FRSQRTE */
8107             if (!fp_access_check(s)) {
8108                 return;
8109             }
8110             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
8111             return;
8112         case 0x1a: /* FCVTNS */
8113         case 0x1b: /* FCVTMS */
8114         case 0x3a: /* FCVTPS */
8115         case 0x3b: /* FCVTZS */
8116         case 0x5a: /* FCVTNU */
8117         case 0x5b: /* FCVTMU */
8118         case 0x7a: /* FCVTPU */
8119         case 0x7b: /* FCVTZU */
8120             is_fcvt = true;
8121             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8122             break;
8123         case 0x1c: /* FCVTAS */
8124         case 0x5c: /* FCVTAU */
8125             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8126             is_fcvt = true;
8127             rmode = FPROUNDING_TIEAWAY;
8128             break;
8129         case 0x56: /* FCVTXN, FCVTXN2 */
8130             if (size == 2) {
8131                 unallocated_encoding(s);
8132                 return;
8133             }
8134             if (!fp_access_check(s)) {
8135                 return;
8136             }
8137             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8138             return;
8139         default:
8140             unallocated_encoding(s);
8141             return;
8142         }
8143         break;
8144     default:
8145         unallocated_encoding(s);
8146         return;
8147     }
8148
8149     if (!fp_access_check(s)) {
8150         return;
8151     }
8152
8153     if (is_fcvt) {
8154         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8155         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8156         tcg_fpstatus = get_fpstatus_ptr();
8157     } else {
8158         TCGV_UNUSED_I32(tcg_rmode);
8159         TCGV_UNUSED_PTR(tcg_fpstatus);
8160     }
8161
8162     if (size == 3) {
8163         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8164         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8165
8166         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8167         write_fp_dreg(s, rd, tcg_rd);
8168         tcg_temp_free_i64(tcg_rd);
8169         tcg_temp_free_i64(tcg_rn);
8170     } else {
8171         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8172         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8173
8174         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8175
8176         switch (opcode) {
8177         case 0x7: /* SQABS, SQNEG */
8178         {
8179             NeonGenOneOpEnvFn *genfn;
8180             static NeonGenOneOpEnvFn * const fns[3][2] = {
8181                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8182                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8183                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8184             };
8185             genfn = fns[size][u];
8186             genfn(tcg_rd, cpu_env, tcg_rn);
8187             break;
8188         }
8189         case 0x1a: /* FCVTNS */
8190         case 0x1b: /* FCVTMS */
8191         case 0x1c: /* FCVTAS */
8192         case 0x3a: /* FCVTPS */
8193         case 0x3b: /* FCVTZS */
8194         {
8195             TCGv_i32 tcg_shift = tcg_const_i32(0);
8196             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8197             tcg_temp_free_i32(tcg_shift);
8198             break;
8199         }
8200         case 0x5a: /* FCVTNU */
8201         case 0x5b: /* FCVTMU */
8202         case 0x5c: /* FCVTAU */
8203         case 0x7a: /* FCVTPU */
8204         case 0x7b: /* FCVTZU */
8205         {
8206             TCGv_i32 tcg_shift = tcg_const_i32(0);
8207             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8208             tcg_temp_free_i32(tcg_shift);
8209             break;
8210         }
8211         default:
8212             g_assert_not_reached();
8213         }
8214
8215         write_fp_sreg(s, rd, tcg_rd);
8216         tcg_temp_free_i32(tcg_rd);
8217         tcg_temp_free_i32(tcg_rn);
8218     }
8219
8220     if (is_fcvt) {
8221         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
8222         tcg_temp_free_i32(tcg_rmode);
8223         tcg_temp_free_ptr(tcg_fpstatus);
8224     }
8225 }
8226
8227 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8228 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8229                                  int immh, int immb, int opcode, int rn, int rd)
8230 {
8231     int size = 32 - clz32(immh) - 1;
8232     int immhb = immh << 3 | immb;
8233     int shift = 2 * (8 << size) - immhb;
8234     bool accumulate = false;
8235     bool round = false;
8236     bool insert = false;
8237     int dsize = is_q ? 128 : 64;
8238     int esize = 8 << size;
8239     int elements = dsize/esize;
8240     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8241     TCGv_i64 tcg_rn = new_tmp_a64(s);
8242     TCGv_i64 tcg_rd = new_tmp_a64(s);
8243     TCGv_i64 tcg_round;
8244     int i;
8245
8246     if (extract32(immh, 3, 1) && !is_q) {
8247         unallocated_encoding(s);
8248         return;
8249     }
8250
8251     if (size > 3 && !is_q) {
8252         unallocated_encoding(s);
8253         return;
8254     }
8255
8256     if (!fp_access_check(s)) {
8257         return;
8258     }
8259
8260     switch (opcode) {
8261     case 0x02: /* SSRA / USRA (accumulate) */
8262         accumulate = true;
8263         break;
8264     case 0x04: /* SRSHR / URSHR (rounding) */
8265         round = true;
8266         break;
8267     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8268         accumulate = round = true;
8269         break;
8270     case 0x08: /* SRI */
8271         insert = true;
8272         break;
8273     }
8274
8275     if (round) {
8276         uint64_t round_const = 1ULL << (shift - 1);
8277         tcg_round = tcg_const_i64(round_const);
8278     } else {
8279         TCGV_UNUSED_I64(tcg_round);
8280     }
8281
8282     for (i = 0; i < elements; i++) {
8283         read_vec_element(s, tcg_rn, rn, i, memop);
8284         if (accumulate || insert) {
8285             read_vec_element(s, tcg_rd, rd, i, memop);
8286         }
8287
8288         if (insert) {
8289             handle_shri_with_ins(tcg_rd, tcg_rn, size, shift);
8290         } else {
8291             handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8292                                     accumulate, is_u, size, shift);
8293         }
8294
8295         write_vec_element(s, tcg_rd, rd, i, size);
8296     }
8297
8298     if (!is_q) {
8299         clear_vec_high(s, rd);
8300     }
8301
8302     if (round) {
8303         tcg_temp_free_i64(tcg_round);
8304     }
8305 }
8306
8307 /* SHL/SLI - Vector shift left */
8308 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
8309                                 int immh, int immb, int opcode, int rn, int rd)
8310 {
8311     int size = 32 - clz32(immh) - 1;
8312     int immhb = immh << 3 | immb;
8313     int shift = immhb - (8 << size);
8314     int dsize = is_q ? 128 : 64;
8315     int esize = 8 << size;
8316     int elements = dsize/esize;
8317     TCGv_i64 tcg_rn = new_tmp_a64(s);
8318     TCGv_i64 tcg_rd = new_tmp_a64(s);
8319     int i;
8320
8321     if (extract32(immh, 3, 1) && !is_q) {
8322         unallocated_encoding(s);
8323         return;
8324     }
8325
8326     if (size > 3 && !is_q) {
8327         unallocated_encoding(s);
8328         return;
8329     }
8330
8331     if (!fp_access_check(s)) {
8332         return;
8333     }
8334
8335     for (i = 0; i < elements; i++) {
8336         read_vec_element(s, tcg_rn, rn, i, size);
8337         if (insert) {
8338             read_vec_element(s, tcg_rd, rd, i, size);
8339         }
8340
8341         handle_shli_with_ins(tcg_rd, tcg_rn, insert, shift);
8342
8343         write_vec_element(s, tcg_rd, rd, i, size);
8344     }
8345
8346     if (!is_q) {
8347         clear_vec_high(s, rd);
8348     }
8349 }
8350
8351 /* USHLL/SHLL - Vector shift left with widening */
8352 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
8353                                  int immh, int immb, int opcode, int rn, int rd)
8354 {
8355     int size = 32 - clz32(immh) - 1;
8356     int immhb = immh << 3 | immb;
8357     int shift = immhb - (8 << size);
8358     int dsize = 64;
8359     int esize = 8 << size;
8360     int elements = dsize/esize;
8361     TCGv_i64 tcg_rn = new_tmp_a64(s);
8362     TCGv_i64 tcg_rd = new_tmp_a64(s);
8363     int i;
8364
8365     if (size >= 3) {
8366         unallocated_encoding(s);
8367         return;
8368     }
8369
8370     if (!fp_access_check(s)) {
8371         return;
8372     }
8373
8374     /* For the LL variants the store is larger than the load,
8375      * so if rd == rn we would overwrite parts of our input.
8376      * So load everything right now and use shifts in the main loop.
8377      */
8378     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
8379
8380     for (i = 0; i < elements; i++) {
8381         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
8382         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
8383         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
8384         write_vec_element(s, tcg_rd, rd, i, size + 1);
8385     }
8386 }
8387
8388 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
8389 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
8390                                  int immh, int immb, int opcode, int rn, int rd)
8391 {
8392     int immhb = immh << 3 | immb;
8393     int size = 32 - clz32(immh) - 1;
8394     int dsize = 64;
8395     int esize = 8 << size;
8396     int elements = dsize/esize;
8397     int shift = (2 * esize) - immhb;
8398     bool round = extract32(opcode, 0, 1);
8399     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
8400     TCGv_i64 tcg_round;
8401     int i;
8402
8403     if (extract32(immh, 3, 1)) {
8404         unallocated_encoding(s);
8405         return;
8406     }
8407
8408     if (!fp_access_check(s)) {
8409         return;
8410     }
8411
8412     tcg_rn = tcg_temp_new_i64();
8413     tcg_rd = tcg_temp_new_i64();
8414     tcg_final = tcg_temp_new_i64();
8415     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
8416
8417     if (round) {
8418         uint64_t round_const = 1ULL << (shift - 1);
8419         tcg_round = tcg_const_i64(round_const);
8420     } else {
8421         TCGV_UNUSED_I64(tcg_round);
8422     }
8423
8424     for (i = 0; i < elements; i++) {
8425         read_vec_element(s, tcg_rn, rn, i, size+1);
8426         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
8427                                 false, true, size+1, shift);
8428
8429         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
8430     }
8431
8432     if (!is_q) {
8433         clear_vec_high(s, rd);
8434         write_vec_element(s, tcg_final, rd, 0, MO_64);
8435     } else {
8436         write_vec_element(s, tcg_final, rd, 1, MO_64);
8437     }
8438
8439     if (round) {
8440         tcg_temp_free_i64(tcg_round);
8441     }
8442     tcg_temp_free_i64(tcg_rn);
8443     tcg_temp_free_i64(tcg_rd);
8444     tcg_temp_free_i64(tcg_final);
8445     return;
8446 }
8447
8448
8449 /* C3.6.14 AdvSIMD shift by immediate
8450  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
8451  * +---+---+---+-------------+------+------+--------+---+------+------+
8452  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
8453  * +---+---+---+-------------+------+------+--------+---+------+------+
8454  */
8455 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
8456 {
8457     int rd = extract32(insn, 0, 5);
8458     int rn = extract32(insn, 5, 5);
8459     int opcode = extract32(insn, 11, 5);
8460     int immb = extract32(insn, 16, 3);
8461     int immh = extract32(insn, 19, 4);
8462     bool is_u = extract32(insn, 29, 1);
8463     bool is_q = extract32(insn, 30, 1);
8464
8465     switch (opcode) {
8466     case 0x08: /* SRI */
8467         if (!is_u) {
8468             unallocated_encoding(s);
8469             return;
8470         }
8471         /* fall through */
8472     case 0x00: /* SSHR / USHR */
8473     case 0x02: /* SSRA / USRA (accumulate) */
8474     case 0x04: /* SRSHR / URSHR (rounding) */
8475     case 0x06: /* SRSRA / URSRA (accum + rounding) */
8476         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
8477         break;
8478     case 0x0a: /* SHL / SLI */
8479         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8480         break;
8481     case 0x10: /* SHRN */
8482     case 0x11: /* RSHRN / SQRSHRUN */
8483         if (is_u) {
8484             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
8485                                    opcode, rn, rd);
8486         } else {
8487             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
8488         }
8489         break;
8490     case 0x12: /* SQSHRN / UQSHRN */
8491     case 0x13: /* SQRSHRN / UQRSHRN */
8492         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
8493                                opcode, rn, rd);
8494         break;
8495     case 0x14: /* SSHLL / USHLL */
8496         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
8497         break;
8498     case 0x1c: /* SCVTF / UCVTF */
8499         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
8500                                      opcode, rn, rd);
8501         break;
8502     case 0xc: /* SQSHLU */
8503         if (!is_u) {
8504             unallocated_encoding(s);
8505             return;
8506         }
8507         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
8508         break;
8509     case 0xe: /* SQSHL, UQSHL */
8510         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
8511         break;
8512     case 0x1f: /* FCVTZS/ FCVTZU */
8513         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
8514         return;
8515     default:
8516         unallocated_encoding(s);
8517         return;
8518     }
8519 }
8520
8521 /* Generate code to do a "long" addition or subtraction, ie one done in
8522  * TCGv_i64 on vector lanes twice the width specified by size.
8523  */
8524 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
8525                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
8526 {
8527     static NeonGenTwo64OpFn * const fns[3][2] = {
8528         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
8529         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
8530         { tcg_gen_add_i64, tcg_gen_sub_i64 },
8531     };
8532     NeonGenTwo64OpFn *genfn;
8533     assert(size < 3);
8534
8535     genfn = fns[size][is_sub];
8536     genfn(tcg_res, tcg_op1, tcg_op2);
8537 }
8538
8539 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
8540                                 int opcode, int rd, int rn, int rm)
8541 {
8542     /* 3-reg-different widening insns: 64 x 64 -> 128 */
8543     TCGv_i64 tcg_res[2];
8544     int pass, accop;
8545
8546     tcg_res[0] = tcg_temp_new_i64();
8547     tcg_res[1] = tcg_temp_new_i64();
8548
8549     /* Does this op do an adding accumulate, a subtracting accumulate,
8550      * or no accumulate at all?
8551      */
8552     switch (opcode) {
8553     case 5:
8554     case 8:
8555     case 9:
8556         accop = 1;
8557         break;
8558     case 10:
8559     case 11:
8560         accop = -1;
8561         break;
8562     default:
8563         accop = 0;
8564         break;
8565     }
8566
8567     if (accop != 0) {
8568         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
8569         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
8570     }
8571
8572     /* size == 2 means two 32x32->64 operations; this is worth special
8573      * casing because we can generally handle it inline.
8574      */
8575     if (size == 2) {
8576         for (pass = 0; pass < 2; pass++) {
8577             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8578             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8579             TCGv_i64 tcg_passres;
8580             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
8581
8582             int elt = pass + is_q * 2;
8583
8584             read_vec_element(s, tcg_op1, rn, elt, memop);
8585             read_vec_element(s, tcg_op2, rm, elt, memop);
8586
8587             if (accop == 0) {
8588                 tcg_passres = tcg_res[pass];
8589             } else {
8590                 tcg_passres = tcg_temp_new_i64();
8591             }
8592
8593             switch (opcode) {
8594             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8595                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
8596                 break;
8597             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8598                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
8599                 break;
8600             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8601             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8602             {
8603                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
8604                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
8605
8606                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
8607                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
8608                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
8609                                     tcg_passres,
8610                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
8611                 tcg_temp_free_i64(tcg_tmp1);
8612                 tcg_temp_free_i64(tcg_tmp2);
8613                 break;
8614             }
8615             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8616             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8617             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8618                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8619                 break;
8620             case 9: /* SQDMLAL, SQDMLAL2 */
8621             case 11: /* SQDMLSL, SQDMLSL2 */
8622             case 13: /* SQDMULL, SQDMULL2 */
8623                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
8624                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
8625                                                   tcg_passres, tcg_passres);
8626                 break;
8627             default:
8628                 g_assert_not_reached();
8629             }
8630
8631             if (opcode == 9 || opcode == 11) {
8632                 /* saturating accumulate ops */
8633                 if (accop < 0) {
8634                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
8635                 }
8636                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
8637                                                   tcg_res[pass], tcg_passres);
8638             } else if (accop > 0) {
8639                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8640             } else if (accop < 0) {
8641                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
8642             }
8643
8644             if (accop != 0) {
8645                 tcg_temp_free_i64(tcg_passres);
8646             }
8647
8648             tcg_temp_free_i64(tcg_op1);
8649             tcg_temp_free_i64(tcg_op2);
8650         }
8651     } else {
8652         /* size 0 or 1, generally helper functions */
8653         for (pass = 0; pass < 2; pass++) {
8654             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8655             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8656             TCGv_i64 tcg_passres;
8657             int elt = pass + is_q * 2;
8658
8659             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
8660             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
8661
8662             if (accop == 0) {
8663                 tcg_passres = tcg_res[pass];
8664             } else {
8665                 tcg_passres = tcg_temp_new_i64();
8666             }
8667
8668             switch (opcode) {
8669             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8670             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8671             {
8672                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
8673                 static NeonGenWidenFn * const widenfns[2][2] = {
8674                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8675                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8676                 };
8677                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
8678
8679                 widenfn(tcg_op2_64, tcg_op2);
8680                 widenfn(tcg_passres, tcg_op1);
8681                 gen_neon_addl(size, (opcode == 2), tcg_passres,
8682                               tcg_passres, tcg_op2_64);
8683                 tcg_temp_free_i64(tcg_op2_64);
8684                 break;
8685             }
8686             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8687             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8688                 if (size == 0) {
8689                     if (is_u) {
8690                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
8691                     } else {
8692                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
8693                     }
8694                 } else {
8695                     if (is_u) {
8696                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
8697                     } else {
8698                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
8699                     }
8700                 }
8701                 break;
8702             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8703             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8704             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
8705                 if (size == 0) {
8706                     if (is_u) {
8707                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
8708                     } else {
8709                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
8710                     }
8711                 } else {
8712                     if (is_u) {
8713                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
8714                     } else {
8715                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8716                     }
8717                 }
8718                 break;
8719             case 9: /* SQDMLAL, SQDMLAL2 */
8720             case 11: /* SQDMLSL, SQDMLSL2 */
8721             case 13: /* SQDMULL, SQDMULL2 */
8722                 assert(size == 1);
8723                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
8724                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
8725                                                   tcg_passres, tcg_passres);
8726                 break;
8727             case 14: /* PMULL */
8728                 assert(size == 0);
8729                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
8730                 break;
8731             default:
8732                 g_assert_not_reached();
8733             }
8734             tcg_temp_free_i32(tcg_op1);
8735             tcg_temp_free_i32(tcg_op2);
8736
8737             if (accop != 0) {
8738                 if (opcode == 9 || opcode == 11) {
8739                     /* saturating accumulate ops */
8740                     if (accop < 0) {
8741                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
8742                     }
8743                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
8744                                                       tcg_res[pass],
8745                                                       tcg_passres);
8746                 } else {
8747                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
8748                                   tcg_res[pass], tcg_passres);
8749                 }
8750                 tcg_temp_free_i64(tcg_passres);
8751             }
8752         }
8753     }
8754
8755     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
8756     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
8757     tcg_temp_free_i64(tcg_res[0]);
8758     tcg_temp_free_i64(tcg_res[1]);
8759 }
8760
8761 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
8762                             int opcode, int rd, int rn, int rm)
8763 {
8764     TCGv_i64 tcg_res[2];
8765     int part = is_q ? 2 : 0;
8766     int pass;
8767
8768     for (pass = 0; pass < 2; pass++) {
8769         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8770         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8771         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
8772         static NeonGenWidenFn * const widenfns[3][2] = {
8773             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
8774             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
8775             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
8776         };
8777         NeonGenWidenFn *widenfn = widenfns[size][is_u];
8778
8779         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8780         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
8781         widenfn(tcg_op2_wide, tcg_op2);
8782         tcg_temp_free_i32(tcg_op2);
8783         tcg_res[pass] = tcg_temp_new_i64();
8784         gen_neon_addl(size, (opcode == 3),
8785                       tcg_res[pass], tcg_op1, tcg_op2_wide);
8786         tcg_temp_free_i64(tcg_op1);
8787         tcg_temp_free_i64(tcg_op2_wide);
8788     }
8789
8790     for (pass = 0; pass < 2; pass++) {
8791         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
8792         tcg_temp_free_i64(tcg_res[pass]);
8793     }
8794 }
8795
8796 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
8797 {
8798     tcg_gen_addi_i64(in, in, 1U << 31);
8799     tcg_gen_extrh_i64_i32(res, in);
8800 }
8801
8802 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
8803                                  int opcode, int rd, int rn, int rm)
8804 {
8805     TCGv_i32 tcg_res[2];
8806     int part = is_q ? 2 : 0;
8807     int pass;
8808
8809     for (pass = 0; pass < 2; pass++) {
8810         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8811         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8812         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
8813         static NeonGenNarrowFn * const narrowfns[3][2] = {
8814             { gen_helper_neon_narrow_high_u8,
8815               gen_helper_neon_narrow_round_high_u8 },
8816             { gen_helper_neon_narrow_high_u16,
8817               gen_helper_neon_narrow_round_high_u16 },
8818             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
8819         };
8820         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
8821
8822         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8823         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8824
8825         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
8826
8827         tcg_temp_free_i64(tcg_op1);
8828         tcg_temp_free_i64(tcg_op2);
8829
8830         tcg_res[pass] = tcg_temp_new_i32();
8831         gennarrow(tcg_res[pass], tcg_wideres);
8832         tcg_temp_free_i64(tcg_wideres);
8833     }
8834
8835     for (pass = 0; pass < 2; pass++) {
8836         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
8837         tcg_temp_free_i32(tcg_res[pass]);
8838     }
8839     if (!is_q) {
8840         clear_vec_high(s, rd);
8841     }
8842 }
8843
8844 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
8845 {
8846     /* PMULL of 64 x 64 -> 128 is an odd special case because it
8847      * is the only three-reg-diff instruction which produces a
8848      * 128-bit wide result from a single operation. However since
8849      * it's possible to calculate the two halves more or less
8850      * separately we just use two helper calls.
8851      */
8852     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8853     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8854     TCGv_i64 tcg_res = tcg_temp_new_i64();
8855
8856     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
8857     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
8858     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
8859     write_vec_element(s, tcg_res, rd, 0, MO_64);
8860     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
8861     write_vec_element(s, tcg_res, rd, 1, MO_64);
8862
8863     tcg_temp_free_i64(tcg_op1);
8864     tcg_temp_free_i64(tcg_op2);
8865     tcg_temp_free_i64(tcg_res);
8866 }
8867
8868 /* C3.6.15 AdvSIMD three different
8869  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
8870  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8871  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
8872  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
8873  */
8874 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
8875 {
8876     /* Instructions in this group fall into three basic classes
8877      * (in each case with the operation working on each element in
8878      * the input vectors):
8879      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
8880      *     128 bit input)
8881      * (2) wide 64 x 128 -> 128
8882      * (3) narrowing 128 x 128 -> 64
8883      * Here we do initial decode, catch unallocated cases and
8884      * dispatch to separate functions for each class.
8885      */
8886     int is_q = extract32(insn, 30, 1);
8887     int is_u = extract32(insn, 29, 1);
8888     int size = extract32(insn, 22, 2);
8889     int opcode = extract32(insn, 12, 4);
8890     int rm = extract32(insn, 16, 5);
8891     int rn = extract32(insn, 5, 5);
8892     int rd = extract32(insn, 0, 5);
8893
8894     switch (opcode) {
8895     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
8896     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
8897         /* 64 x 128 -> 128 */
8898         if (size == 3) {
8899             unallocated_encoding(s);
8900             return;
8901         }
8902         if (!fp_access_check(s)) {
8903             return;
8904         }
8905         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
8906         break;
8907     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
8908     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
8909         /* 128 x 128 -> 64 */
8910         if (size == 3) {
8911             unallocated_encoding(s);
8912             return;
8913         }
8914         if (!fp_access_check(s)) {
8915             return;
8916         }
8917         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
8918         break;
8919     case 14: /* PMULL, PMULL2 */
8920         if (is_u || size == 1 || size == 2) {
8921             unallocated_encoding(s);
8922             return;
8923         }
8924         if (size == 3) {
8925             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
8926                 unallocated_encoding(s);
8927                 return;
8928             }
8929             if (!fp_access_check(s)) {
8930                 return;
8931             }
8932             handle_pmull_64(s, is_q, rd, rn, rm);
8933             return;
8934         }
8935         goto is_widening;
8936     case 9: /* SQDMLAL, SQDMLAL2 */
8937     case 11: /* SQDMLSL, SQDMLSL2 */
8938     case 13: /* SQDMULL, SQDMULL2 */
8939         if (is_u || size == 0) {
8940             unallocated_encoding(s);
8941             return;
8942         }
8943         /* fall through */
8944     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
8945     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
8946     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
8947     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
8948     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
8949     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
8950     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
8951         /* 64 x 64 -> 128 */
8952         if (size == 3) {
8953             unallocated_encoding(s);
8954             return;
8955         }
8956     is_widening:
8957         if (!fp_access_check(s)) {
8958             return;
8959         }
8960
8961         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
8962         break;
8963     default:
8964         /* opcode 15 not allocated */
8965         unallocated_encoding(s);
8966         break;
8967     }
8968 }
8969
8970 /* Logic op (opcode == 3) subgroup of C3.6.16. */
8971 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
8972 {
8973     int rd = extract32(insn, 0, 5);
8974     int rn = extract32(insn, 5, 5);
8975     int rm = extract32(insn, 16, 5);
8976     int size = extract32(insn, 22, 2);
8977     bool is_u = extract32(insn, 29, 1);
8978     bool is_q = extract32(insn, 30, 1);
8979     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
8980     int pass;
8981
8982     if (!fp_access_check(s)) {
8983         return;
8984     }
8985
8986     tcg_op1 = tcg_temp_new_i64();
8987     tcg_op2 = tcg_temp_new_i64();
8988     tcg_res[0] = tcg_temp_new_i64();
8989     tcg_res[1] = tcg_temp_new_i64();
8990
8991     for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
8992         read_vec_element(s, tcg_op1, rn, pass, MO_64);
8993         read_vec_element(s, tcg_op2, rm, pass, MO_64);
8994
8995         if (!is_u) {
8996             switch (size) {
8997             case 0: /* AND */
8998                 tcg_gen_and_i64(tcg_res[pass], tcg_op1, tcg_op2);
8999                 break;
9000             case 1: /* BIC */
9001                 tcg_gen_andc_i64(tcg_res[pass], tcg_op1, tcg_op2);
9002                 break;
9003             case 2: /* ORR */
9004                 tcg_gen_or_i64(tcg_res[pass], tcg_op1, tcg_op2);
9005                 break;
9006             case 3: /* ORN */
9007                 tcg_gen_orc_i64(tcg_res[pass], tcg_op1, tcg_op2);
9008                 break;
9009             }
9010         } else {
9011             if (size != 0) {
9012                 /* B* ops need res loaded to operate on */
9013                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9014             }
9015
9016             switch (size) {
9017             case 0: /* EOR */
9018                 tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
9019                 break;
9020             case 1: /* BSL bitwise select */
9021                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_op2);
9022                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9023                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op1);
9024                 break;
9025             case 2: /* BIT, bitwise insert if true */
9026                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9027                 tcg_gen_and_i64(tcg_op1, tcg_op1, tcg_op2);
9028                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9029                 break;
9030             case 3: /* BIF, bitwise insert if false */
9031                 tcg_gen_xor_i64(tcg_op1, tcg_op1, tcg_res[pass]);
9032                 tcg_gen_andc_i64(tcg_op1, tcg_op1, tcg_op2);
9033                 tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9034                 break;
9035             }
9036         }
9037     }
9038
9039     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
9040     if (!is_q) {
9041         tcg_gen_movi_i64(tcg_res[1], 0);
9042     }
9043     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
9044
9045     tcg_temp_free_i64(tcg_op1);
9046     tcg_temp_free_i64(tcg_op2);
9047     tcg_temp_free_i64(tcg_res[0]);
9048     tcg_temp_free_i64(tcg_res[1]);
9049 }
9050
9051 /* Helper functions for 32 bit comparisons */
9052 static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9053 {
9054     tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
9055 }
9056
9057 static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9058 {
9059     tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
9060 }
9061
9062 static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9063 {
9064     tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
9065 }
9066
9067 static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9068 {
9069     tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
9070 }
9071
9072 /* Pairwise op subgroup of C3.6.16.
9073  *
9074  * This is called directly or via the handle_3same_float for float pairwise
9075  * operations where the opcode and size are calculated differently.
9076  */
9077 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
9078                                    int size, int rn, int rm, int rd)
9079 {
9080     TCGv_ptr fpst;
9081     int pass;
9082
9083     /* Floating point operations need fpst */
9084     if (opcode >= 0x58) {
9085         fpst = get_fpstatus_ptr();
9086     } else {
9087         TCGV_UNUSED_PTR(fpst);
9088     }
9089
9090     if (!fp_access_check(s)) {
9091         return;
9092     }
9093
9094     /* These operations work on the concatenated rm:rn, with each pair of
9095      * adjacent elements being operated on to produce an element in the result.
9096      */
9097     if (size == 3) {
9098         TCGv_i64 tcg_res[2];
9099
9100         for (pass = 0; pass < 2; pass++) {
9101             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9102             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9103             int passreg = (pass == 0) ? rn : rm;
9104
9105             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
9106             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
9107             tcg_res[pass] = tcg_temp_new_i64();
9108
9109             switch (opcode) {
9110             case 0x17: /* ADDP */
9111                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9112                 break;
9113             case 0x58: /* FMAXNMP */
9114                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9115                 break;
9116             case 0x5a: /* FADDP */
9117                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9118                 break;
9119             case 0x5e: /* FMAXP */
9120                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9121                 break;
9122             case 0x78: /* FMINNMP */
9123                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9124                 break;
9125             case 0x7e: /* FMINP */
9126                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9127                 break;
9128             default:
9129                 g_assert_not_reached();
9130             }
9131
9132             tcg_temp_free_i64(tcg_op1);
9133             tcg_temp_free_i64(tcg_op2);
9134         }
9135
9136         for (pass = 0; pass < 2; pass++) {
9137             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9138             tcg_temp_free_i64(tcg_res[pass]);
9139         }
9140     } else {
9141         int maxpass = is_q ? 4 : 2;
9142         TCGv_i32 tcg_res[4];
9143
9144         for (pass = 0; pass < maxpass; pass++) {
9145             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9146             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9147             NeonGenTwoOpFn *genfn = NULL;
9148             int passreg = pass < (maxpass / 2) ? rn : rm;
9149             int passelt = (is_q && (pass & 1)) ? 2 : 0;
9150
9151             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
9152             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
9153             tcg_res[pass] = tcg_temp_new_i32();
9154
9155             switch (opcode) {
9156             case 0x17: /* ADDP */
9157             {
9158                 static NeonGenTwoOpFn * const fns[3] = {
9159                     gen_helper_neon_padd_u8,
9160                     gen_helper_neon_padd_u16,
9161                     tcg_gen_add_i32,
9162                 };
9163                 genfn = fns[size];
9164                 break;
9165             }
9166             case 0x14: /* SMAXP, UMAXP */
9167             {
9168                 static NeonGenTwoOpFn * const fns[3][2] = {
9169                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
9170                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
9171                     { gen_max_s32, gen_max_u32 },
9172                 };
9173                 genfn = fns[size][u];
9174                 break;
9175             }
9176             case 0x15: /* SMINP, UMINP */
9177             {
9178                 static NeonGenTwoOpFn * const fns[3][2] = {
9179                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
9180                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
9181                     { gen_min_s32, gen_min_u32 },
9182                 };
9183                 genfn = fns[size][u];
9184                 break;
9185             }
9186             /* The FP operations are all on single floats (32 bit) */
9187             case 0x58: /* FMAXNMP */
9188                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9189                 break;
9190             case 0x5a: /* FADDP */
9191                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9192                 break;
9193             case 0x5e: /* FMAXP */
9194                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9195                 break;
9196             case 0x78: /* FMINNMP */
9197                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9198                 break;
9199             case 0x7e: /* FMINP */
9200                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9201                 break;
9202             default:
9203                 g_assert_not_reached();
9204             }
9205
9206             /* FP ops called directly, otherwise call now */
9207             if (genfn) {
9208                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
9209             }
9210
9211             tcg_temp_free_i32(tcg_op1);
9212             tcg_temp_free_i32(tcg_op2);
9213         }
9214
9215         for (pass = 0; pass < maxpass; pass++) {
9216             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9217             tcg_temp_free_i32(tcg_res[pass]);
9218         }
9219         if (!is_q) {
9220             clear_vec_high(s, rd);
9221         }
9222     }
9223
9224     if (!TCGV_IS_UNUSED_PTR(fpst)) {
9225         tcg_temp_free_ptr(fpst);
9226     }
9227 }
9228
9229 /* Floating point op subgroup of C3.6.16. */
9230 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
9231 {
9232     /* For floating point ops, the U, size[1] and opcode bits
9233      * together indicate the operation. size[0] indicates single
9234      * or double.
9235      */
9236     int fpopcode = extract32(insn, 11, 5)
9237         | (extract32(insn, 23, 1) << 5)
9238         | (extract32(insn, 29, 1) << 6);
9239     int is_q = extract32(insn, 30, 1);
9240     int size = extract32(insn, 22, 1);
9241     int rm = extract32(insn, 16, 5);
9242     int rn = extract32(insn, 5, 5);
9243     int rd = extract32(insn, 0, 5);
9244
9245     int datasize = is_q ? 128 : 64;
9246     int esize = 32 << size;
9247     int elements = datasize / esize;
9248
9249     if (size == 1 && !is_q) {
9250         unallocated_encoding(s);
9251         return;
9252     }
9253
9254     switch (fpopcode) {
9255     case 0x58: /* FMAXNMP */
9256     case 0x5a: /* FADDP */
9257     case 0x5e: /* FMAXP */
9258     case 0x78: /* FMINNMP */
9259     case 0x7e: /* FMINP */
9260         if (size && !is_q) {
9261             unallocated_encoding(s);
9262             return;
9263         }
9264         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
9265                                rn, rm, rd);
9266         return;
9267     case 0x1b: /* FMULX */
9268     case 0x1f: /* FRECPS */
9269     case 0x3f: /* FRSQRTS */
9270     case 0x5d: /* FACGE */
9271     case 0x7d: /* FACGT */
9272     case 0x19: /* FMLA */
9273     case 0x39: /* FMLS */
9274     case 0x18: /* FMAXNM */
9275     case 0x1a: /* FADD */
9276     case 0x1c: /* FCMEQ */
9277     case 0x1e: /* FMAX */
9278     case 0x38: /* FMINNM */
9279     case 0x3a: /* FSUB */
9280     case 0x3e: /* FMIN */
9281     case 0x5b: /* FMUL */
9282     case 0x5c: /* FCMGE */
9283     case 0x5f: /* FDIV */
9284     case 0x7a: /* FABD */
9285     case 0x7c: /* FCMGT */
9286         if (!fp_access_check(s)) {
9287             return;
9288         }
9289
9290         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
9291         return;
9292     default:
9293         unallocated_encoding(s);
9294         return;
9295     }
9296 }
9297
9298 /* Integer op subgroup of C3.6.16. */
9299 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
9300 {
9301     int is_q = extract32(insn, 30, 1);
9302     int u = extract32(insn, 29, 1);
9303     int size = extract32(insn, 22, 2);
9304     int opcode = extract32(insn, 11, 5);
9305     int rm = extract32(insn, 16, 5);
9306     int rn = extract32(insn, 5, 5);
9307     int rd = extract32(insn, 0, 5);
9308     int pass;
9309
9310     switch (opcode) {
9311     case 0x13: /* MUL, PMUL */
9312         if (u && size != 0) {
9313             unallocated_encoding(s);
9314             return;
9315         }
9316         /* fall through */
9317     case 0x0: /* SHADD, UHADD */
9318     case 0x2: /* SRHADD, URHADD */
9319     case 0x4: /* SHSUB, UHSUB */
9320     case 0xc: /* SMAX, UMAX */
9321     case 0xd: /* SMIN, UMIN */
9322     case 0xe: /* SABD, UABD */
9323     case 0xf: /* SABA, UABA */
9324     case 0x12: /* MLA, MLS */
9325         if (size == 3) {
9326             unallocated_encoding(s);
9327             return;
9328         }
9329         break;
9330     case 0x16: /* SQDMULH, SQRDMULH */
9331         if (size == 0 || size == 3) {
9332             unallocated_encoding(s);
9333             return;
9334         }
9335         break;
9336     default:
9337         if (size == 3 && !is_q) {
9338             unallocated_encoding(s);
9339             return;
9340         }
9341         break;
9342     }
9343
9344     if (!fp_access_check(s)) {
9345         return;
9346     }
9347
9348     if (size == 3) {
9349         assert(is_q);
9350         for (pass = 0; pass < 2; pass++) {
9351             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9352             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9353             TCGv_i64 tcg_res = tcg_temp_new_i64();
9354
9355             read_vec_element(s, tcg_op1, rn, pass, MO_64);
9356             read_vec_element(s, tcg_op2, rm, pass, MO_64);
9357
9358             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
9359
9360             write_vec_element(s, tcg_res, rd, pass, MO_64);
9361
9362             tcg_temp_free_i64(tcg_res);
9363             tcg_temp_free_i64(tcg_op1);
9364             tcg_temp_free_i64(tcg_op2);
9365         }
9366     } else {
9367         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
9368             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9369             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9370             TCGv_i32 tcg_res = tcg_temp_new_i32();
9371             NeonGenTwoOpFn *genfn = NULL;
9372             NeonGenTwoOpEnvFn *genenvfn = NULL;
9373
9374             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
9375             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
9376
9377             switch (opcode) {
9378             case 0x0: /* SHADD, UHADD */
9379             {
9380                 static NeonGenTwoOpFn * const fns[3][2] = {
9381                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
9382                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
9383                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
9384                 };
9385                 genfn = fns[size][u];
9386                 break;
9387             }
9388             case 0x1: /* SQADD, UQADD */
9389             {
9390                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9391                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
9392                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
9393                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
9394                 };
9395                 genenvfn = fns[size][u];
9396                 break;
9397             }
9398             case 0x2: /* SRHADD, URHADD */
9399             {
9400                 static NeonGenTwoOpFn * const fns[3][2] = {
9401                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
9402                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
9403                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
9404                 };
9405                 genfn = fns[size][u];
9406                 break;
9407             }
9408             case 0x4: /* SHSUB, UHSUB */
9409             {
9410                 static NeonGenTwoOpFn * const fns[3][2] = {
9411                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
9412                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
9413                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
9414                 };
9415                 genfn = fns[size][u];
9416                 break;
9417             }
9418             case 0x5: /* SQSUB, UQSUB */
9419             {
9420                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9421                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
9422                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
9423                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
9424                 };
9425                 genenvfn = fns[size][u];
9426                 break;
9427             }
9428             case 0x6: /* CMGT, CMHI */
9429             {
9430                 static NeonGenTwoOpFn * const fns[3][2] = {
9431                     { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_u8 },
9432                     { gen_helper_neon_cgt_s16, gen_helper_neon_cgt_u16 },
9433                     { gen_helper_neon_cgt_s32, gen_helper_neon_cgt_u32 },
9434                 };
9435                 genfn = fns[size][u];
9436                 break;
9437             }
9438             case 0x7: /* CMGE, CMHS */
9439             {
9440                 static NeonGenTwoOpFn * const fns[3][2] = {
9441                     { gen_helper_neon_cge_s8, gen_helper_neon_cge_u8 },
9442                     { gen_helper_neon_cge_s16, gen_helper_neon_cge_u16 },
9443                     { gen_helper_neon_cge_s32, gen_helper_neon_cge_u32 },
9444                 };
9445                 genfn = fns[size][u];
9446                 break;
9447             }
9448             case 0x8: /* SSHL, USHL */
9449             {
9450                 static NeonGenTwoOpFn * const fns[3][2] = {
9451                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
9452                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
9453                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
9454                 };
9455                 genfn = fns[size][u];
9456                 break;
9457             }
9458             case 0x9: /* SQSHL, UQSHL */
9459             {
9460                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9461                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
9462                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
9463                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
9464                 };
9465                 genenvfn = fns[size][u];
9466                 break;
9467             }
9468             case 0xa: /* SRSHL, URSHL */
9469             {
9470                 static NeonGenTwoOpFn * const fns[3][2] = {
9471                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
9472                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
9473                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
9474                 };
9475                 genfn = fns[size][u];
9476                 break;
9477             }
9478             case 0xb: /* SQRSHL, UQRSHL */
9479             {
9480                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
9481                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
9482                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
9483                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
9484                 };
9485                 genenvfn = fns[size][u];
9486                 break;
9487             }
9488             case 0xc: /* SMAX, UMAX */
9489             {
9490                 static NeonGenTwoOpFn * const fns[3][2] = {
9491                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
9492                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
9493                     { gen_max_s32, gen_max_u32 },
9494                 };
9495                 genfn = fns[size][u];
9496                 break;
9497             }
9498
9499             case 0xd: /* SMIN, UMIN */
9500             {
9501                 static NeonGenTwoOpFn * const fns[3][2] = {
9502                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
9503                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
9504                     { gen_min_s32, gen_min_u32 },
9505                 };
9506                 genfn = fns[size][u];
9507                 break;
9508             }
9509             case 0xe: /* SABD, UABD */
9510             case 0xf: /* SABA, UABA */
9511             {
9512                 static NeonGenTwoOpFn * const fns[3][2] = {
9513                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
9514                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
9515                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
9516                 };
9517                 genfn = fns[size][u];
9518                 break;
9519             }
9520             case 0x10: /* ADD, SUB */
9521             {
9522                 static NeonGenTwoOpFn * const fns[3][2] = {
9523                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9524                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9525                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9526                 };
9527                 genfn = fns[size][u];
9528                 break;
9529             }
9530             case 0x11: /* CMTST, CMEQ */
9531             {
9532                 static NeonGenTwoOpFn * const fns[3][2] = {
9533                     { gen_helper_neon_tst_u8, gen_helper_neon_ceq_u8 },
9534                     { gen_helper_neon_tst_u16, gen_helper_neon_ceq_u16 },
9535                     { gen_helper_neon_tst_u32, gen_helper_neon_ceq_u32 },
9536                 };
9537                 genfn = fns[size][u];
9538                 break;
9539             }
9540             case 0x13: /* MUL, PMUL */
9541                 if (u) {
9542                     /* PMUL */
9543                     assert(size == 0);
9544                     genfn = gen_helper_neon_mul_p8;
9545                     break;
9546                 }
9547                 /* fall through : MUL */
9548             case 0x12: /* MLA, MLS */
9549             {
9550                 static NeonGenTwoOpFn * const fns[3] = {
9551                     gen_helper_neon_mul_u8,
9552                     gen_helper_neon_mul_u16,
9553                     tcg_gen_mul_i32,
9554                 };
9555                 genfn = fns[size];
9556                 break;
9557             }
9558             case 0x16: /* SQDMULH, SQRDMULH */
9559             {
9560                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
9561                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
9562                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
9563                 };
9564                 assert(size == 1 || size == 2);
9565                 genenvfn = fns[size - 1][u];
9566                 break;
9567             }
9568             default:
9569                 g_assert_not_reached();
9570             }
9571
9572             if (genenvfn) {
9573                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
9574             } else {
9575                 genfn(tcg_res, tcg_op1, tcg_op2);
9576             }
9577
9578             if (opcode == 0xf || opcode == 0x12) {
9579                 /* SABA, UABA, MLA, MLS: accumulating ops */
9580                 static NeonGenTwoOpFn * const fns[3][2] = {
9581                     { gen_helper_neon_add_u8, gen_helper_neon_sub_u8 },
9582                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
9583                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
9584                 };
9585                 bool is_sub = (opcode == 0x12 && u); /* MLS */
9586
9587                 genfn = fns[size][is_sub];
9588                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
9589                 genfn(tcg_res, tcg_op1, tcg_res);
9590             }
9591
9592             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
9593
9594             tcg_temp_free_i32(tcg_res);
9595             tcg_temp_free_i32(tcg_op1);
9596             tcg_temp_free_i32(tcg_op2);
9597         }
9598     }
9599
9600     if (!is_q) {
9601         clear_vec_high(s, rd);
9602     }
9603 }
9604
9605 /* C3.6.16 AdvSIMD three same
9606  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
9607  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9608  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
9609  * +---+---+---+-----------+------+---+------+--------+---+------+------+
9610  */
9611 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
9612 {
9613     int opcode = extract32(insn, 11, 5);
9614
9615     switch (opcode) {
9616     case 0x3: /* logic ops */
9617         disas_simd_3same_logic(s, insn);
9618         break;
9619     case 0x17: /* ADDP */
9620     case 0x14: /* SMAXP, UMAXP */
9621     case 0x15: /* SMINP, UMINP */
9622     {
9623         /* Pairwise operations */
9624         int is_q = extract32(insn, 30, 1);
9625         int u = extract32(insn, 29, 1);
9626         int size = extract32(insn, 22, 2);
9627         int rm = extract32(insn, 16, 5);
9628         int rn = extract32(insn, 5, 5);
9629         int rd = extract32(insn, 0, 5);
9630         if (opcode == 0x17) {
9631             if (u || (size == 3 && !is_q)) {
9632                 unallocated_encoding(s);
9633                 return;
9634             }
9635         } else {
9636             if (size == 3) {
9637                 unallocated_encoding(s);
9638                 return;
9639             }
9640         }
9641         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
9642         break;
9643     }
9644     case 0x18 ... 0x31:
9645         /* floating point ops, sz[1] and U are part of opcode */
9646         disas_simd_3same_float(s, insn);
9647         break;
9648     default:
9649         disas_simd_3same_int(s, insn);
9650         break;
9651     }
9652 }
9653
9654 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
9655                                   int size, int rn, int rd)
9656 {
9657     /* Handle 2-reg-misc ops which are widening (so each size element
9658      * in the source becomes a 2*size element in the destination.
9659      * The only instruction like this is FCVTL.
9660      */
9661     int pass;
9662
9663     if (size == 3) {
9664         /* 32 -> 64 bit fp conversion */
9665         TCGv_i64 tcg_res[2];
9666         int srcelt = is_q ? 2 : 0;
9667
9668         for (pass = 0; pass < 2; pass++) {
9669             TCGv_i32 tcg_op = tcg_temp_new_i32();
9670             tcg_res[pass] = tcg_temp_new_i64();
9671
9672             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
9673             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
9674             tcg_temp_free_i32(tcg_op);
9675         }
9676         for (pass = 0; pass < 2; pass++) {
9677             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9678             tcg_temp_free_i64(tcg_res[pass]);
9679         }
9680     } else {
9681         /* 16 -> 32 bit fp conversion */
9682         int srcelt = is_q ? 4 : 0;
9683         TCGv_i32 tcg_res[4];
9684
9685         for (pass = 0; pass < 4; pass++) {
9686             tcg_res[pass] = tcg_temp_new_i32();
9687
9688             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
9689             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
9690                                            cpu_env);
9691         }
9692         for (pass = 0; pass < 4; pass++) {
9693             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
9694             tcg_temp_free_i32(tcg_res[pass]);
9695         }
9696     }
9697 }
9698
9699 static void handle_rev(DisasContext *s, int opcode, bool u,
9700                        bool is_q, int size, int rn, int rd)
9701 {
9702     int op = (opcode << 1) | u;
9703     int opsz = op + size;
9704     int grp_size = 3 - opsz;
9705     int dsize = is_q ? 128 : 64;
9706     int i;
9707
9708     if (opsz >= 3) {
9709         unallocated_encoding(s);
9710         return;
9711     }
9712
9713     if (!fp_access_check(s)) {
9714         return;
9715     }
9716
9717     if (size == 0) {
9718         /* Special case bytes, use bswap op on each group of elements */
9719         int groups = dsize / (8 << grp_size);
9720
9721         for (i = 0; i < groups; i++) {
9722             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
9723
9724             read_vec_element(s, tcg_tmp, rn, i, grp_size);
9725             switch (grp_size) {
9726             case MO_16:
9727                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
9728                 break;
9729             case MO_32:
9730                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
9731                 break;
9732             case MO_64:
9733                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
9734                 break;
9735             default:
9736                 g_assert_not_reached();
9737             }
9738             write_vec_element(s, tcg_tmp, rd, i, grp_size);
9739             tcg_temp_free_i64(tcg_tmp);
9740         }
9741         if (!is_q) {
9742             clear_vec_high(s, rd);
9743         }
9744     } else {
9745         int revmask = (1 << grp_size) - 1;
9746         int esize = 8 << size;
9747         int elements = dsize / esize;
9748         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9749         TCGv_i64 tcg_rd = tcg_const_i64(0);
9750         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
9751
9752         for (i = 0; i < elements; i++) {
9753             int e_rev = (i & 0xf) ^ revmask;
9754             int off = e_rev * esize;
9755             read_vec_element(s, tcg_rn, rn, i, size);
9756             if (off >= 64) {
9757                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
9758                                     tcg_rn, off - 64, esize);
9759             } else {
9760                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
9761             }
9762         }
9763         write_vec_element(s, tcg_rd, rd, 0, MO_64);
9764         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
9765
9766         tcg_temp_free_i64(tcg_rd_hi);
9767         tcg_temp_free_i64(tcg_rd);
9768         tcg_temp_free_i64(tcg_rn);
9769     }
9770 }
9771
9772 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
9773                                   bool is_q, int size, int rn, int rd)
9774 {
9775     /* Implement the pairwise operations from 2-misc:
9776      * SADDLP, UADDLP, SADALP, UADALP.
9777      * These all add pairs of elements in the input to produce a
9778      * double-width result element in the output (possibly accumulating).
9779      */
9780     bool accum = (opcode == 0x6);
9781     int maxpass = is_q ? 2 : 1;
9782     int pass;
9783     TCGv_i64 tcg_res[2];
9784
9785     if (size == 2) {
9786         /* 32 + 32 -> 64 op */
9787         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
9788
9789         for (pass = 0; pass < maxpass; pass++) {
9790             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9791             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9792
9793             tcg_res[pass] = tcg_temp_new_i64();
9794
9795             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
9796             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
9797             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9798             if (accum) {
9799                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
9800                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
9801             }
9802
9803             tcg_temp_free_i64(tcg_op1);
9804             tcg_temp_free_i64(tcg_op2);
9805         }
9806     } else {
9807         for (pass = 0; pass < maxpass; pass++) {
9808             TCGv_i64 tcg_op = tcg_temp_new_i64();
9809             NeonGenOneOpFn *genfn;
9810             static NeonGenOneOpFn * const fns[2][2] = {
9811                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
9812                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
9813             };
9814
9815             genfn = fns[size][u];
9816
9817             tcg_res[pass] = tcg_temp_new_i64();
9818
9819             read_vec_element(s, tcg_op, rn, pass, MO_64);
9820             genfn(tcg_res[pass], tcg_op);
9821
9822             if (accum) {
9823                 read_vec_element(s, tcg_op, rd, pass, MO_64);
9824                 if (size == 0) {
9825                     gen_helper_neon_addl_u16(tcg_res[pass],
9826                                              tcg_res[pass], tcg_op);
9827                 } else {
9828                     gen_helper_neon_addl_u32(tcg_res[pass],
9829                                              tcg_res[pass], tcg_op);
9830                 }
9831             }
9832             tcg_temp_free_i64(tcg_op);
9833         }
9834     }
9835     if (!is_q) {
9836         tcg_res[1] = tcg_const_i64(0);
9837     }
9838     for (pass = 0; pass < 2; pass++) {
9839         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9840         tcg_temp_free_i64(tcg_res[pass]);
9841     }
9842 }
9843
9844 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
9845 {
9846     /* Implement SHLL and SHLL2 */
9847     int pass;
9848     int part = is_q ? 2 : 0;
9849     TCGv_i64 tcg_res[2];
9850
9851     for (pass = 0; pass < 2; pass++) {
9852         static NeonGenWidenFn * const widenfns[3] = {
9853             gen_helper_neon_widen_u8,
9854             gen_helper_neon_widen_u16,
9855             tcg_gen_extu_i32_i64,
9856         };
9857         NeonGenWidenFn *widenfn = widenfns[size];
9858         TCGv_i32 tcg_op = tcg_temp_new_i32();
9859
9860         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
9861         tcg_res[pass] = tcg_temp_new_i64();
9862         widenfn(tcg_res[pass], tcg_op);
9863         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
9864
9865         tcg_temp_free_i32(tcg_op);
9866     }
9867
9868     for (pass = 0; pass < 2; pass++) {
9869         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9870         tcg_temp_free_i64(tcg_res[pass]);
9871     }
9872 }
9873
9874 /* C3.6.17 AdvSIMD two reg misc
9875  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9876  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9877  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9878  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
9879  */
9880 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
9881 {
9882     int size = extract32(insn, 22, 2);
9883     int opcode = extract32(insn, 12, 5);
9884     bool u = extract32(insn, 29, 1);
9885     bool is_q = extract32(insn, 30, 1);
9886     int rn = extract32(insn, 5, 5);
9887     int rd = extract32(insn, 0, 5);
9888     bool need_fpstatus = false;
9889     bool need_rmode = false;
9890     int rmode = -1;
9891     TCGv_i32 tcg_rmode;
9892     TCGv_ptr tcg_fpstatus;
9893
9894     switch (opcode) {
9895     case 0x0: /* REV64, REV32 */
9896     case 0x1: /* REV16 */
9897         handle_rev(s, opcode, u, is_q, size, rn, rd);
9898         return;
9899     case 0x5: /* CNT, NOT, RBIT */
9900         if (u && size == 0) {
9901             /* NOT: adjust size so we can use the 64-bits-at-a-time loop. */
9902             size = 3;
9903             break;
9904         } else if (u && size == 1) {
9905             /* RBIT */
9906             break;
9907         } else if (!u && size == 0) {
9908             /* CNT */
9909             break;
9910         }
9911         unallocated_encoding(s);
9912         return;
9913     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
9914     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
9915         if (size == 3) {
9916             unallocated_encoding(s);
9917             return;
9918         }
9919         if (!fp_access_check(s)) {
9920             return;
9921         }
9922
9923         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
9924         return;
9925     case 0x4: /* CLS, CLZ */
9926         if (size == 3) {
9927             unallocated_encoding(s);
9928             return;
9929         }
9930         break;
9931     case 0x2: /* SADDLP, UADDLP */
9932     case 0x6: /* SADALP, UADALP */
9933         if (size == 3) {
9934             unallocated_encoding(s);
9935             return;
9936         }
9937         if (!fp_access_check(s)) {
9938             return;
9939         }
9940         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
9941         return;
9942     case 0x13: /* SHLL, SHLL2 */
9943         if (u == 0 || size == 3) {
9944             unallocated_encoding(s);
9945             return;
9946         }
9947         if (!fp_access_check(s)) {
9948             return;
9949         }
9950         handle_shll(s, is_q, size, rn, rd);
9951         return;
9952     case 0xa: /* CMLT */
9953         if (u == 1) {
9954             unallocated_encoding(s);
9955             return;
9956         }
9957         /* fall through */
9958     case 0x8: /* CMGT, CMGE */
9959     case 0x9: /* CMEQ, CMLE */
9960     case 0xb: /* ABS, NEG */
9961         if (size == 3 && !is_q) {
9962             unallocated_encoding(s);
9963             return;
9964         }
9965         break;
9966     case 0x3: /* SUQADD, USQADD */
9967         if (size == 3 && !is_q) {
9968             unallocated_encoding(s);
9969             return;
9970         }
9971         if (!fp_access_check(s)) {
9972             return;
9973         }
9974         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
9975         return;
9976     case 0x7: /* SQABS, SQNEG */
9977         if (size == 3 && !is_q) {
9978             unallocated_encoding(s);
9979             return;
9980         }
9981         break;
9982     case 0xc ... 0xf:
9983     case 0x16 ... 0x1d:
9984     case 0x1f:
9985     {
9986         /* Floating point: U, size[1] and opcode indicate operation;
9987          * size[0] indicates single or double precision.
9988          */
9989         int is_double = extract32(size, 0, 1);
9990         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9991         size = is_double ? 3 : 2;
9992         switch (opcode) {
9993         case 0x2f: /* FABS */
9994         case 0x6f: /* FNEG */
9995             if (size == 3 && !is_q) {
9996                 unallocated_encoding(s);
9997                 return;
9998             }
9999             break;
10000         case 0x1d: /* SCVTF */
10001         case 0x5d: /* UCVTF */
10002         {
10003             bool is_signed = (opcode == 0x1d) ? true : false;
10004             int elements = is_double ? 2 : is_q ? 4 : 2;
10005             if (is_double && !is_q) {
10006                 unallocated_encoding(s);
10007                 return;
10008             }
10009             if (!fp_access_check(s)) {
10010                 return;
10011             }
10012             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
10013             return;
10014         }
10015         case 0x2c: /* FCMGT (zero) */
10016         case 0x2d: /* FCMEQ (zero) */
10017         case 0x2e: /* FCMLT (zero) */
10018         case 0x6c: /* FCMGE (zero) */
10019         case 0x6d: /* FCMLE (zero) */
10020             if (size == 3 && !is_q) {
10021                 unallocated_encoding(s);
10022                 return;
10023             }
10024             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
10025             return;
10026         case 0x7f: /* FSQRT */
10027             if (size == 3 && !is_q) {
10028                 unallocated_encoding(s);
10029                 return;
10030             }
10031             break;
10032         case 0x1a: /* FCVTNS */
10033         case 0x1b: /* FCVTMS */
10034         case 0x3a: /* FCVTPS */
10035         case 0x3b: /* FCVTZS */
10036         case 0x5a: /* FCVTNU */
10037         case 0x5b: /* FCVTMU */
10038         case 0x7a: /* FCVTPU */
10039         case 0x7b: /* FCVTZU */
10040             need_fpstatus = true;
10041             need_rmode = true;
10042             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10043             if (size == 3 && !is_q) {
10044                 unallocated_encoding(s);
10045                 return;
10046             }
10047             break;
10048         case 0x5c: /* FCVTAU */
10049         case 0x1c: /* FCVTAS */
10050             need_fpstatus = true;
10051             need_rmode = true;
10052             rmode = FPROUNDING_TIEAWAY;
10053             if (size == 3 && !is_q) {
10054                 unallocated_encoding(s);
10055                 return;
10056             }
10057             break;
10058         case 0x3c: /* URECPE */
10059             if (size == 3) {
10060                 unallocated_encoding(s);
10061                 return;
10062             }
10063             /* fall through */
10064         case 0x3d: /* FRECPE */
10065         case 0x7d: /* FRSQRTE */
10066             if (size == 3 && !is_q) {
10067                 unallocated_encoding(s);
10068                 return;
10069             }
10070             if (!fp_access_check(s)) {
10071                 return;
10072             }
10073             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
10074             return;
10075         case 0x56: /* FCVTXN, FCVTXN2 */
10076             if (size == 2) {
10077                 unallocated_encoding(s);
10078                 return;
10079             }
10080             /* fall through */
10081         case 0x16: /* FCVTN, FCVTN2 */
10082             /* handle_2misc_narrow does a 2*size -> size operation, but these
10083              * instructions encode the source size rather than dest size.
10084              */
10085             if (!fp_access_check(s)) {
10086                 return;
10087             }
10088             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
10089             return;
10090         case 0x17: /* FCVTL, FCVTL2 */
10091             if (!fp_access_check(s)) {
10092                 return;
10093             }
10094             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
10095             return;
10096         case 0x18: /* FRINTN */
10097         case 0x19: /* FRINTM */
10098         case 0x38: /* FRINTP */
10099         case 0x39: /* FRINTZ */
10100             need_rmode = true;
10101             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
10102             /* fall through */
10103         case 0x59: /* FRINTX */
10104         case 0x79: /* FRINTI */
10105             need_fpstatus = true;
10106             if (size == 3 && !is_q) {
10107                 unallocated_encoding(s);
10108                 return;
10109             }
10110             break;
10111         case 0x58: /* FRINTA */
10112             need_rmode = true;
10113             rmode = FPROUNDING_TIEAWAY;
10114             need_fpstatus = true;
10115             if (size == 3 && !is_q) {
10116                 unallocated_encoding(s);
10117                 return;
10118             }
10119             break;
10120         case 0x7c: /* URSQRTE */
10121             if (size == 3) {
10122                 unallocated_encoding(s);
10123                 return;
10124             }
10125             need_fpstatus = true;
10126             break;
10127         default:
10128             unallocated_encoding(s);
10129             return;
10130         }
10131         break;
10132     }
10133     default:
10134         unallocated_encoding(s);
10135         return;
10136     }
10137
10138     if (!fp_access_check(s)) {
10139         return;
10140     }
10141
10142     if (need_fpstatus) {
10143         tcg_fpstatus = get_fpstatus_ptr();
10144     } else {
10145         TCGV_UNUSED_PTR(tcg_fpstatus);
10146     }
10147     if (need_rmode) {
10148         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
10149         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10150     } else {
10151         TCGV_UNUSED_I32(tcg_rmode);
10152     }
10153
10154     if (size == 3) {
10155         /* All 64-bit element operations can be shared with scalar 2misc */
10156         int pass;
10157
10158         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
10159             TCGv_i64 tcg_op = tcg_temp_new_i64();
10160             TCGv_i64 tcg_res = tcg_temp_new_i64();
10161
10162             read_vec_element(s, tcg_op, rn, pass, MO_64);
10163
10164             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
10165                             tcg_rmode, tcg_fpstatus);
10166
10167             write_vec_element(s, tcg_res, rd, pass, MO_64);
10168
10169             tcg_temp_free_i64(tcg_res);
10170             tcg_temp_free_i64(tcg_op);
10171         }
10172     } else {
10173         int pass;
10174
10175         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10176             TCGv_i32 tcg_op = tcg_temp_new_i32();
10177             TCGv_i32 tcg_res = tcg_temp_new_i32();
10178             TCGCond cond;
10179
10180             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
10181
10182             if (size == 2) {
10183                 /* Special cases for 32 bit elements */
10184                 switch (opcode) {
10185                 case 0xa: /* CMLT */
10186                     /* 32 bit integer comparison against zero, result is
10187                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
10188                      * and inverting.
10189                      */
10190                     cond = TCG_COND_LT;
10191                 do_cmop:
10192                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
10193                     tcg_gen_neg_i32(tcg_res, tcg_res);
10194                     break;
10195                 case 0x8: /* CMGT, CMGE */
10196                     cond = u ? TCG_COND_GE : TCG_COND_GT;
10197                     goto do_cmop;
10198                 case 0x9: /* CMEQ, CMLE */
10199                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
10200                     goto do_cmop;
10201                 case 0x4: /* CLS */
10202                     if (u) {
10203                         gen_helper_clz32(tcg_res, tcg_op);
10204                     } else {
10205                         gen_helper_cls32(tcg_res, tcg_op);
10206                     }
10207                     break;
10208                 case 0x7: /* SQABS, SQNEG */
10209                     if (u) {
10210                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
10211                     } else {
10212                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
10213                     }
10214                     break;
10215                 case 0xb: /* ABS, NEG */
10216                     if (u) {
10217                         tcg_gen_neg_i32(tcg_res, tcg_op);
10218                     } else {
10219                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10220                         tcg_gen_neg_i32(tcg_res, tcg_op);
10221                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
10222                                             tcg_zero, tcg_op, tcg_res);
10223                         tcg_temp_free_i32(tcg_zero);
10224                     }
10225                     break;
10226                 case 0x2f: /* FABS */
10227                     gen_helper_vfp_abss(tcg_res, tcg_op);
10228                     break;
10229                 case 0x6f: /* FNEG */
10230                     gen_helper_vfp_negs(tcg_res, tcg_op);
10231                     break;
10232                 case 0x7f: /* FSQRT */
10233                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
10234                     break;
10235                 case 0x1a: /* FCVTNS */
10236                 case 0x1b: /* FCVTMS */
10237                 case 0x1c: /* FCVTAS */
10238                 case 0x3a: /* FCVTPS */
10239                 case 0x3b: /* FCVTZS */
10240                 {
10241                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10242                     gen_helper_vfp_tosls(tcg_res, tcg_op,
10243                                          tcg_shift, tcg_fpstatus);
10244                     tcg_temp_free_i32(tcg_shift);
10245                     break;
10246                 }
10247                 case 0x5a: /* FCVTNU */
10248                 case 0x5b: /* FCVTMU */
10249                 case 0x5c: /* FCVTAU */
10250                 case 0x7a: /* FCVTPU */
10251                 case 0x7b: /* FCVTZU */
10252                 {
10253                     TCGv_i32 tcg_shift = tcg_const_i32(0);
10254                     gen_helper_vfp_touls(tcg_res, tcg_op,
10255                                          tcg_shift, tcg_fpstatus);
10256                     tcg_temp_free_i32(tcg_shift);
10257                     break;
10258                 }
10259                 case 0x18: /* FRINTN */
10260                 case 0x19: /* FRINTM */
10261                 case 0x38: /* FRINTP */
10262                 case 0x39: /* FRINTZ */
10263                 case 0x58: /* FRINTA */
10264                 case 0x79: /* FRINTI */
10265                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
10266                     break;
10267                 case 0x59: /* FRINTX */
10268                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
10269                     break;
10270                 case 0x7c: /* URSQRTE */
10271                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
10272                     break;
10273                 default:
10274                     g_assert_not_reached();
10275                 }
10276             } else {
10277                 /* Use helpers for 8 and 16 bit elements */
10278                 switch (opcode) {
10279                 case 0x5: /* CNT, RBIT */
10280                     /* For these two insns size is part of the opcode specifier
10281                      * (handled earlier); they always operate on byte elements.
10282                      */
10283                     if (u) {
10284                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
10285                     } else {
10286                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
10287                     }
10288                     break;
10289                 case 0x7: /* SQABS, SQNEG */
10290                 {
10291                     NeonGenOneOpEnvFn *genfn;
10292                     static NeonGenOneOpEnvFn * const fns[2][2] = {
10293                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
10294                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
10295                     };
10296                     genfn = fns[size][u];
10297                     genfn(tcg_res, cpu_env, tcg_op);
10298                     break;
10299                 }
10300                 case 0x8: /* CMGT, CMGE */
10301                 case 0x9: /* CMEQ, CMLE */
10302                 case 0xa: /* CMLT */
10303                 {
10304                     static NeonGenTwoOpFn * const fns[3][2] = {
10305                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
10306                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
10307                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
10308                     };
10309                     NeonGenTwoOpFn *genfn;
10310                     int comp;
10311                     bool reverse;
10312                     TCGv_i32 tcg_zero = tcg_const_i32(0);
10313
10314                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
10315                     comp = (opcode - 0x8) * 2 + u;
10316                     /* ...but LE, LT are implemented as reverse GE, GT */
10317                     reverse = (comp > 2);
10318                     if (reverse) {
10319                         comp = 4 - comp;
10320                     }
10321                     genfn = fns[comp][size];
10322                     if (reverse) {
10323                         genfn(tcg_res, tcg_zero, tcg_op);
10324                     } else {
10325                         genfn(tcg_res, tcg_op, tcg_zero);
10326                     }
10327                     tcg_temp_free_i32(tcg_zero);
10328                     break;
10329                 }
10330                 case 0xb: /* ABS, NEG */
10331                     if (u) {
10332                         TCGv_i32 tcg_zero = tcg_const_i32(0);
10333                         if (size) {
10334                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
10335                         } else {
10336                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
10337                         }
10338                         tcg_temp_free_i32(tcg_zero);
10339                     } else {
10340                         if (size) {
10341                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
10342                         } else {
10343                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
10344                         }
10345                     }
10346                     break;
10347                 case 0x4: /* CLS, CLZ */
10348                     if (u) {
10349                         if (size == 0) {
10350                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
10351                         } else {
10352                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
10353                         }
10354                     } else {
10355                         if (size == 0) {
10356                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
10357                         } else {
10358                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
10359                         }
10360                     }
10361                     break;
10362                 default:
10363                     g_assert_not_reached();
10364                 }
10365             }
10366
10367             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10368
10369             tcg_temp_free_i32(tcg_res);
10370             tcg_temp_free_i32(tcg_op);
10371         }
10372     }
10373     if (!is_q) {
10374         clear_vec_high(s, rd);
10375     }
10376
10377     if (need_rmode) {
10378         gen_helper_set_rmode(tcg_rmode, tcg_rmode, cpu_env);
10379         tcg_temp_free_i32(tcg_rmode);
10380     }
10381     if (need_fpstatus) {
10382         tcg_temp_free_ptr(tcg_fpstatus);
10383     }
10384 }
10385
10386 /* C3.6.13 AdvSIMD scalar x indexed element
10387  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10388  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10389  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10390  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
10391  * C3.6.18 AdvSIMD vector x indexed element
10392  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
10393  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10394  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
10395  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
10396  */
10397 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
10398 {
10399     /* This encoding has two kinds of instruction:
10400      *  normal, where we perform elt x idxelt => elt for each
10401      *     element in the vector
10402      *  long, where we perform elt x idxelt and generate a result of
10403      *     double the width of the input element
10404      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
10405      */
10406     bool is_scalar = extract32(insn, 28, 1);
10407     bool is_q = extract32(insn, 30, 1);
10408     bool u = extract32(insn, 29, 1);
10409     int size = extract32(insn, 22, 2);
10410     int l = extract32(insn, 21, 1);
10411     int m = extract32(insn, 20, 1);
10412     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
10413     int rm = extract32(insn, 16, 4);
10414     int opcode = extract32(insn, 12, 4);
10415     int h = extract32(insn, 11, 1);
10416     int rn = extract32(insn, 5, 5);
10417     int rd = extract32(insn, 0, 5);
10418     bool is_long = false;
10419     bool is_fp = false;
10420     int index;
10421     TCGv_ptr fpst;
10422
10423     switch (opcode) {
10424     case 0x0: /* MLA */
10425     case 0x4: /* MLS */
10426         if (!u || is_scalar) {
10427             unallocated_encoding(s);
10428             return;
10429         }
10430         break;
10431     case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10432     case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10433     case 0xa: /* SMULL, SMULL2, UMULL, UMULL2 */
10434         if (is_scalar) {
10435             unallocated_encoding(s);
10436             return;
10437         }
10438         is_long = true;
10439         break;
10440     case 0x3: /* SQDMLAL, SQDMLAL2 */
10441     case 0x7: /* SQDMLSL, SQDMLSL2 */
10442     case 0xb: /* SQDMULL, SQDMULL2 */
10443         is_long = true;
10444         /* fall through */
10445     case 0xc: /* SQDMULH */
10446     case 0xd: /* SQRDMULH */
10447         if (u) {
10448             unallocated_encoding(s);
10449             return;
10450         }
10451         break;
10452     case 0x8: /* MUL */
10453         if (u || is_scalar) {
10454             unallocated_encoding(s);
10455             return;
10456         }
10457         break;
10458     case 0x1: /* FMLA */
10459     case 0x5: /* FMLS */
10460         if (u) {
10461             unallocated_encoding(s);
10462             return;
10463         }
10464         /* fall through */
10465     case 0x9: /* FMUL, FMULX */
10466         if (!extract32(size, 1, 1)) {
10467             unallocated_encoding(s);
10468             return;
10469         }
10470         is_fp = true;
10471         break;
10472     default:
10473         unallocated_encoding(s);
10474         return;
10475     }
10476
10477     if (is_fp) {
10478         /* low bit of size indicates single/double */
10479         size = extract32(size, 0, 1) ? 3 : 2;
10480         if (size == 2) {
10481             index = h << 1 | l;
10482         } else {
10483             if (l || !is_q) {
10484                 unallocated_encoding(s);
10485                 return;
10486             }
10487             index = h;
10488         }
10489         rm |= (m << 4);
10490     } else {
10491         switch (size) {
10492         case 1:
10493             index = h << 2 | l << 1 | m;
10494             break;
10495         case 2:
10496             index = h << 1 | l;
10497             rm |= (m << 4);
10498             break;
10499         default:
10500             unallocated_encoding(s);
10501             return;
10502         }
10503     }
10504
10505     if (!fp_access_check(s)) {
10506         return;
10507     }
10508
10509     if (is_fp) {
10510         fpst = get_fpstatus_ptr();
10511     } else {
10512         TCGV_UNUSED_PTR(fpst);
10513     }
10514
10515     if (size == 3) {
10516         TCGv_i64 tcg_idx = tcg_temp_new_i64();
10517         int pass;
10518
10519         assert(is_fp && is_q && !is_long);
10520
10521         read_vec_element(s, tcg_idx, rm, index, MO_64);
10522
10523         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10524             TCGv_i64 tcg_op = tcg_temp_new_i64();
10525             TCGv_i64 tcg_res = tcg_temp_new_i64();
10526
10527             read_vec_element(s, tcg_op, rn, pass, MO_64);
10528
10529             switch (opcode) {
10530             case 0x5: /* FMLS */
10531                 /* As usual for ARM, separate negation for fused multiply-add */
10532                 gen_helper_vfp_negd(tcg_op, tcg_op);
10533                 /* fall through */
10534             case 0x1: /* FMLA */
10535                 read_vec_element(s, tcg_res, rd, pass, MO_64);
10536                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10537                 break;
10538             case 0x9: /* FMUL, FMULX */
10539                 if (u) {
10540                     gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
10541                 } else {
10542                     gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
10543                 }
10544                 break;
10545             default:
10546                 g_assert_not_reached();
10547             }
10548
10549             write_vec_element(s, tcg_res, rd, pass, MO_64);
10550             tcg_temp_free_i64(tcg_op);
10551             tcg_temp_free_i64(tcg_res);
10552         }
10553
10554         if (is_scalar) {
10555             clear_vec_high(s, rd);
10556         }
10557
10558         tcg_temp_free_i64(tcg_idx);
10559     } else if (!is_long) {
10560         /* 32 bit floating point, or 16 or 32 bit integer.
10561          * For the 16 bit scalar case we use the usual Neon helpers and
10562          * rely on the fact that 0 op 0 == 0 with no side effects.
10563          */
10564         TCGv_i32 tcg_idx = tcg_temp_new_i32();
10565         int pass, maxpasses;
10566
10567         if (is_scalar) {
10568             maxpasses = 1;
10569         } else {
10570             maxpasses = is_q ? 4 : 2;
10571         }
10572
10573         read_vec_element_i32(s, tcg_idx, rm, index, size);
10574
10575         if (size == 1 && !is_scalar) {
10576             /* The simplest way to handle the 16x16 indexed ops is to duplicate
10577              * the index into both halves of the 32 bit tcg_idx and then use
10578              * the usual Neon helpers.
10579              */
10580             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10581         }
10582
10583         for (pass = 0; pass < maxpasses; pass++) {
10584             TCGv_i32 tcg_op = tcg_temp_new_i32();
10585             TCGv_i32 tcg_res = tcg_temp_new_i32();
10586
10587             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
10588
10589             switch (opcode) {
10590             case 0x0: /* MLA */
10591             case 0x4: /* MLS */
10592             case 0x8: /* MUL */
10593             {
10594                 static NeonGenTwoOpFn * const fns[2][2] = {
10595                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
10596                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
10597                 };
10598                 NeonGenTwoOpFn *genfn;
10599                 bool is_sub = opcode == 0x4;
10600
10601                 if (size == 1) {
10602                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
10603                 } else {
10604                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
10605                 }
10606                 if (opcode == 0x8) {
10607                     break;
10608                 }
10609                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
10610                 genfn = fns[size - 1][is_sub];
10611                 genfn(tcg_res, tcg_op, tcg_res);
10612                 break;
10613             }
10614             case 0x5: /* FMLS */
10615                 /* As usual for ARM, separate negation for fused multiply-add */
10616                 gen_helper_vfp_negs(tcg_op, tcg_op);
10617                 /* fall through */
10618             case 0x1: /* FMLA */
10619                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10620                 gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
10621                 break;
10622             case 0x9: /* FMUL, FMULX */
10623                 if (u) {
10624                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
10625                 } else {
10626                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
10627                 }
10628                 break;
10629             case 0xc: /* SQDMULH */
10630                 if (size == 1) {
10631                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
10632                                                tcg_op, tcg_idx);
10633                 } else {
10634                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
10635                                                tcg_op, tcg_idx);
10636                 }
10637                 break;
10638             case 0xd: /* SQRDMULH */
10639                 if (size == 1) {
10640                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
10641                                                 tcg_op, tcg_idx);
10642                 } else {
10643                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
10644                                                 tcg_op, tcg_idx);
10645                 }
10646                 break;
10647             default:
10648                 g_assert_not_reached();
10649             }
10650
10651             if (is_scalar) {
10652                 write_fp_sreg(s, rd, tcg_res);
10653             } else {
10654                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10655             }
10656
10657             tcg_temp_free_i32(tcg_op);
10658             tcg_temp_free_i32(tcg_res);
10659         }
10660
10661         tcg_temp_free_i32(tcg_idx);
10662
10663         if (!is_q) {
10664             clear_vec_high(s, rd);
10665         }
10666     } else {
10667         /* long ops: 16x16->32 or 32x32->64 */
10668         TCGv_i64 tcg_res[2];
10669         int pass;
10670         bool satop = extract32(opcode, 0, 1);
10671         TCGMemOp memop = MO_32;
10672
10673         if (satop || !u) {
10674             memop |= MO_SIGN;
10675         }
10676
10677         if (size == 2) {
10678             TCGv_i64 tcg_idx = tcg_temp_new_i64();
10679
10680             read_vec_element(s, tcg_idx, rm, index, memop);
10681
10682             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10683                 TCGv_i64 tcg_op = tcg_temp_new_i64();
10684                 TCGv_i64 tcg_passres;
10685                 int passelt;
10686
10687                 if (is_scalar) {
10688                     passelt = 0;
10689                 } else {
10690                     passelt = pass + (is_q * 2);
10691                 }
10692
10693                 read_vec_element(s, tcg_op, rn, passelt, memop);
10694
10695                 tcg_res[pass] = tcg_temp_new_i64();
10696
10697                 if (opcode == 0xa || opcode == 0xb) {
10698                     /* Non-accumulating ops */
10699                     tcg_passres = tcg_res[pass];
10700                 } else {
10701                     tcg_passres = tcg_temp_new_i64();
10702                 }
10703
10704                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
10705                 tcg_temp_free_i64(tcg_op);
10706
10707                 if (satop) {
10708                     /* saturating, doubling */
10709                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
10710                                                       tcg_passres, tcg_passres);
10711                 }
10712
10713                 if (opcode == 0xa || opcode == 0xb) {
10714                     continue;
10715                 }
10716
10717                 /* Accumulating op: handle accumulate step */
10718                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10719
10720                 switch (opcode) {
10721                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10722                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10723                     break;
10724                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10725                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10726                     break;
10727                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10728                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
10729                     /* fall through */
10730                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10731                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10732                                                       tcg_res[pass],
10733                                                       tcg_passres);
10734                     break;
10735                 default:
10736                     g_assert_not_reached();
10737                 }
10738                 tcg_temp_free_i64(tcg_passres);
10739             }
10740             tcg_temp_free_i64(tcg_idx);
10741
10742             if (is_scalar) {
10743                 clear_vec_high(s, rd);
10744             }
10745         } else {
10746             TCGv_i32 tcg_idx = tcg_temp_new_i32();
10747
10748             assert(size == 1);
10749             read_vec_element_i32(s, tcg_idx, rm, index, size);
10750
10751             if (!is_scalar) {
10752                 /* The simplest way to handle the 16x16 indexed ops is to
10753                  * duplicate the index into both halves of the 32 bit tcg_idx
10754                  * and then use the usual Neon helpers.
10755                  */
10756                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
10757             }
10758
10759             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
10760                 TCGv_i32 tcg_op = tcg_temp_new_i32();
10761                 TCGv_i64 tcg_passres;
10762
10763                 if (is_scalar) {
10764                     read_vec_element_i32(s, tcg_op, rn, pass, size);
10765                 } else {
10766                     read_vec_element_i32(s, tcg_op, rn,
10767                                          pass + (is_q * 2), MO_32);
10768                 }
10769
10770                 tcg_res[pass] = tcg_temp_new_i64();
10771
10772                 if (opcode == 0xa || opcode == 0xb) {
10773                     /* Non-accumulating ops */
10774                     tcg_passres = tcg_res[pass];
10775                 } else {
10776                     tcg_passres = tcg_temp_new_i64();
10777                 }
10778
10779                 if (memop & MO_SIGN) {
10780                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
10781                 } else {
10782                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
10783                 }
10784                 if (satop) {
10785                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10786                                                       tcg_passres, tcg_passres);
10787                 }
10788                 tcg_temp_free_i32(tcg_op);
10789
10790                 if (opcode == 0xa || opcode == 0xb) {
10791                     continue;
10792                 }
10793
10794                 /* Accumulating op: handle accumulate step */
10795                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10796
10797                 switch (opcode) {
10798                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10799                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
10800                                              tcg_passres);
10801                     break;
10802                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10803                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
10804                                              tcg_passres);
10805                     break;
10806                 case 0x7: /* SQDMLSL, SQDMLSL2 */
10807                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10808                     /* fall through */
10809                 case 0x3: /* SQDMLAL, SQDMLAL2 */
10810                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10811                                                       tcg_res[pass],
10812                                                       tcg_passres);
10813                     break;
10814                 default:
10815                     g_assert_not_reached();
10816                 }
10817                 tcg_temp_free_i64(tcg_passres);
10818             }
10819             tcg_temp_free_i32(tcg_idx);
10820
10821             if (is_scalar) {
10822                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
10823             }
10824         }
10825
10826         if (is_scalar) {
10827             tcg_res[1] = tcg_const_i64(0);
10828         }
10829
10830         for (pass = 0; pass < 2; pass++) {
10831             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10832             tcg_temp_free_i64(tcg_res[pass]);
10833         }
10834     }
10835
10836     if (!TCGV_IS_UNUSED_PTR(fpst)) {
10837         tcg_temp_free_ptr(fpst);
10838     }
10839 }
10840
10841 /* C3.6.19 Crypto AES
10842  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10843  * +-----------------+------+-----------+--------+-----+------+------+
10844  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10845  * +-----------------+------+-----------+--------+-----+------+------+
10846  */
10847 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
10848 {
10849     int size = extract32(insn, 22, 2);
10850     int opcode = extract32(insn, 12, 5);
10851     int rn = extract32(insn, 5, 5);
10852     int rd = extract32(insn, 0, 5);
10853     int decrypt;
10854     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
10855     CryptoThreeOpEnvFn *genfn;
10856
10857     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
10858         || size != 0) {
10859         unallocated_encoding(s);
10860         return;
10861     }
10862
10863     switch (opcode) {
10864     case 0x4: /* AESE */
10865         decrypt = 0;
10866         genfn = gen_helper_crypto_aese;
10867         break;
10868     case 0x6: /* AESMC */
10869         decrypt = 0;
10870         genfn = gen_helper_crypto_aesmc;
10871         break;
10872     case 0x5: /* AESD */
10873         decrypt = 1;
10874         genfn = gen_helper_crypto_aese;
10875         break;
10876     case 0x7: /* AESIMC */
10877         decrypt = 1;
10878         genfn = gen_helper_crypto_aesmc;
10879         break;
10880     default:
10881         unallocated_encoding(s);
10882         return;
10883     }
10884
10885     /* Note that we convert the Vx register indexes into the
10886      * index within the vfp.regs[] array, so we can share the
10887      * helper with the AArch32 instructions.
10888      */
10889     tcg_rd_regno = tcg_const_i32(rd << 1);
10890     tcg_rn_regno = tcg_const_i32(rn << 1);
10891     tcg_decrypt = tcg_const_i32(decrypt);
10892
10893     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
10894
10895     tcg_temp_free_i32(tcg_rd_regno);
10896     tcg_temp_free_i32(tcg_rn_regno);
10897     tcg_temp_free_i32(tcg_decrypt);
10898 }
10899
10900 /* C3.6.20 Crypto three-reg SHA
10901  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
10902  * +-----------------+------+---+------+---+--------+-----+------+------+
10903  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
10904  * +-----------------+------+---+------+---+--------+-----+------+------+
10905  */
10906 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
10907 {
10908     int size = extract32(insn, 22, 2);
10909     int opcode = extract32(insn, 12, 3);
10910     int rm = extract32(insn, 16, 5);
10911     int rn = extract32(insn, 5, 5);
10912     int rd = extract32(insn, 0, 5);
10913     CryptoThreeOpEnvFn *genfn;
10914     TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
10915     int feature = ARM_FEATURE_V8_SHA256;
10916
10917     if (size != 0) {
10918         unallocated_encoding(s);
10919         return;
10920     }
10921
10922     switch (opcode) {
10923     case 0: /* SHA1C */
10924     case 1: /* SHA1P */
10925     case 2: /* SHA1M */
10926     case 3: /* SHA1SU0 */
10927         genfn = NULL;
10928         feature = ARM_FEATURE_V8_SHA1;
10929         break;
10930     case 4: /* SHA256H */
10931         genfn = gen_helper_crypto_sha256h;
10932         break;
10933     case 5: /* SHA256H2 */
10934         genfn = gen_helper_crypto_sha256h2;
10935         break;
10936     case 6: /* SHA256SU1 */
10937         genfn = gen_helper_crypto_sha256su1;
10938         break;
10939     default:
10940         unallocated_encoding(s);
10941         return;
10942     }
10943
10944     if (!arm_dc_feature(s, feature)) {
10945         unallocated_encoding(s);
10946         return;
10947     }
10948
10949     tcg_rd_regno = tcg_const_i32(rd << 1);
10950     tcg_rn_regno = tcg_const_i32(rn << 1);
10951     tcg_rm_regno = tcg_const_i32(rm << 1);
10952
10953     if (genfn) {
10954         genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
10955     } else {
10956         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
10957
10958         gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
10959                                     tcg_rn_regno, tcg_rm_regno, tcg_opcode);
10960         tcg_temp_free_i32(tcg_opcode);
10961     }
10962
10963     tcg_temp_free_i32(tcg_rd_regno);
10964     tcg_temp_free_i32(tcg_rn_regno);
10965     tcg_temp_free_i32(tcg_rm_regno);
10966 }
10967
10968 /* C3.6.21 Crypto two-reg SHA
10969  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
10970  * +-----------------+------+-----------+--------+-----+------+------+
10971  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
10972  * +-----------------+------+-----------+--------+-----+------+------+
10973  */
10974 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
10975 {
10976     int size = extract32(insn, 22, 2);
10977     int opcode = extract32(insn, 12, 5);
10978     int rn = extract32(insn, 5, 5);
10979     int rd = extract32(insn, 0, 5);
10980     CryptoTwoOpEnvFn *genfn;
10981     int feature;
10982     TCGv_i32 tcg_rd_regno, tcg_rn_regno;
10983
10984     if (size != 0) {
10985         unallocated_encoding(s);
10986         return;
10987     }
10988
10989     switch (opcode) {
10990     case 0: /* SHA1H */
10991         feature = ARM_FEATURE_V8_SHA1;
10992         genfn = gen_helper_crypto_sha1h;
10993         break;
10994     case 1: /* SHA1SU1 */
10995         feature = ARM_FEATURE_V8_SHA1;
10996         genfn = gen_helper_crypto_sha1su1;
10997         break;
10998     case 2: /* SHA256SU0 */
10999         feature = ARM_FEATURE_V8_SHA256;
11000         genfn = gen_helper_crypto_sha256su0;
11001         break;
11002     default:
11003         unallocated_encoding(s);
11004         return;
11005     }
11006
11007     if (!arm_dc_feature(s, feature)) {
11008         unallocated_encoding(s);
11009         return;
11010     }
11011
11012     tcg_rd_regno = tcg_const_i32(rd << 1);
11013     tcg_rn_regno = tcg_const_i32(rn << 1);
11014
11015     genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
11016
11017     tcg_temp_free_i32(tcg_rd_regno);
11018     tcg_temp_free_i32(tcg_rn_regno);
11019 }
11020
11021 /* C3.6 Data processing - SIMD, inc Crypto
11022  *
11023  * As the decode gets a little complex we are using a table based
11024  * approach for this part of the decode.
11025  */
11026 static const AArch64DecodeTable data_proc_simd[] = {
11027     /* pattern  ,  mask     ,  fn                        */
11028     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
11029     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
11030     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
11031     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
11032     { 0x0e000400, 0x9fe08400, disas_simd_copy },
11033     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
11034     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
11035     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
11036     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
11037     { 0x0e000000, 0xbf208c00, disas_simd_tb },
11038     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
11039     { 0x2e000000, 0xbf208400, disas_simd_ext },
11040     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
11041     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
11042     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
11043     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
11044     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
11045     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
11046     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
11047     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
11048     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
11049     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
11050     { 0x00000000, 0x00000000, NULL }
11051 };
11052
11053 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
11054 {
11055     /* Note that this is called with all non-FP cases from
11056      * table C3-6 so it must UNDEF for entries not specifically
11057      * allocated to instructions in that table.
11058      */
11059     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
11060     if (fn) {
11061         fn(s, insn);
11062     } else {
11063         unallocated_encoding(s);
11064     }
11065 }
11066
11067 /* C3.6 Data processing - SIMD and floating point */
11068 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
11069 {
11070     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
11071         disas_data_proc_fp(s, insn);
11072     } else {
11073         /* SIMD, including crypto */
11074         disas_data_proc_simd(s, insn);
11075     }
11076 }
11077
11078 /* C3.1 A64 instruction index by encoding */
11079 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
11080 {
11081     uint32_t insn;
11082
11083     insn = arm_ldl_code(env, s->pc, s->sctlr_b);
11084     s->insn = insn;
11085     s->pc += 4;
11086
11087     s->fp_access_checked = false;
11088
11089     switch (extract32(insn, 25, 4)) {
11090     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
11091         unallocated_encoding(s);
11092         break;
11093     case 0x8: case 0x9: /* Data processing - immediate */
11094         disas_data_proc_imm(s, insn);
11095         break;
11096     case 0xa: case 0xb: /* Branch, exception generation and system insns */
11097         disas_b_exc_sys(s, insn);
11098         break;
11099     case 0x4:
11100     case 0x6:
11101     case 0xc:
11102     case 0xe:      /* Loads and stores */
11103         disas_ldst(s, insn);
11104         break;
11105     case 0x5:
11106     case 0xd:      /* Data processing - register */
11107         disas_data_proc_reg(s, insn);
11108         break;
11109     case 0x7:
11110     case 0xf:      /* Data processing - SIMD and floating point */
11111         disas_data_proc_simd_fp(s, insn);
11112         break;
11113     default:
11114         assert(FALSE); /* all 15 cases should be handled above */
11115         break;
11116     }
11117
11118     /* if we allocated any temporaries, free them here */
11119     free_tmp_a64(s);
11120 }
11121
11122 void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
11123 {
11124     CPUState *cs = CPU(cpu);
11125     CPUARMState *env = &cpu->env;
11126     DisasContext dc1, *dc = &dc1;
11127     target_ulong pc_start;
11128     target_ulong next_page_start;
11129     int num_insns;
11130     int max_insns;
11131
11132     pc_start = tb->pc;
11133
11134     dc->tb = tb;
11135
11136     dc->is_jmp = DISAS_NEXT;
11137     dc->pc = pc_start;
11138     dc->singlestep_enabled = cs->singlestep_enabled;
11139     dc->condjmp = 0;
11140
11141     dc->aarch64 = 1;
11142     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
11143      * there is no secure EL1, so we route exceptions to EL3.
11144      */
11145     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
11146                                !arm_el_is_aa64(env, 3);
11147     dc->thumb = 0;
11148     dc->sctlr_b = 0;
11149     dc->be_data = ARM_TBFLAG_BE_DATA(tb->flags) ? MO_BE : MO_LE;
11150     dc->condexec_mask = 0;
11151     dc->condexec_cond = 0;
11152     dc->mmu_idx = ARM_TBFLAG_MMUIDX(tb->flags);
11153     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
11154 #if !defined(CONFIG_USER_ONLY)
11155     dc->user = (dc->current_el == 0);
11156 #endif
11157     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(tb->flags);
11158     dc->vec_len = 0;
11159     dc->vec_stride = 0;
11160     dc->cp_regs = cpu->cp_regs;
11161     dc->features = env->features;
11162
11163     /* Single step state. The code-generation logic here is:
11164      *  SS_ACTIVE == 0:
11165      *   generate code with no special handling for single-stepping (except
11166      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
11167      *   this happens anyway because those changes are all system register or
11168      *   PSTATE writes).
11169      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
11170      *   emit code for one insn
11171      *   emit code to clear PSTATE.SS
11172      *   emit code to generate software step exception for completed step
11173      *   end TB (as usual for having generated an exception)
11174      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
11175      *   emit code to generate a software step exception
11176      *   end the TB
11177      */
11178     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(tb->flags);
11179     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(tb->flags);
11180     dc->is_ldex = false;
11181     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
11182
11183     init_tmp_a64_array(dc);
11184
11185     next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
11186     num_insns = 0;
11187     max_insns = tb->cflags & CF_COUNT_MASK;
11188     if (max_insns == 0) {
11189         max_insns = CF_COUNT_MASK;
11190     }
11191     if (max_insns > TCG_MAX_INSNS) {
11192         max_insns = TCG_MAX_INSNS;
11193     }
11194
11195     gen_tb_start(tb);
11196
11197     tcg_clear_temp_count();
11198
11199     do {
11200         dc->insn_start_idx = tcg_op_buf_count();
11201         tcg_gen_insn_start(dc->pc, 0, 0);
11202         num_insns++;
11203
11204         if (unlikely(!QTAILQ_EMPTY(&cs->breakpoints))) {
11205             CPUBreakpoint *bp;
11206             QTAILQ_FOREACH(bp, &cs->breakpoints, entry) {
11207                 if (bp->pc == dc->pc) {
11208                     if (bp->flags & BP_CPU) {
11209                         gen_a64_set_pc_im(dc->pc);
11210                         gen_helper_check_breakpoints(cpu_env);
11211                         /* End the TB early; it likely won't be executed */
11212                         dc->is_jmp = DISAS_UPDATE;
11213                     } else {
11214                         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
11215                         /* The address covered by the breakpoint must be
11216                            included in [tb->pc, tb->pc + tb->size) in order
11217                            to for it to be properly cleared -- thus we
11218                            increment the PC here so that the logic setting
11219                            tb->size below does the right thing.  */
11220                         dc->pc += 4;
11221                         goto done_generating;
11222                     }
11223                     break;
11224                 }
11225             }
11226         }
11227
11228         if (num_insns == max_insns && (tb->cflags & CF_LAST_IO)) {
11229             gen_io_start();
11230         }
11231
11232         if (dc->ss_active && !dc->pstate_ss) {
11233             /* Singlestep state is Active-pending.
11234              * If we're in this state at the start of a TB then either
11235              *  a) we just took an exception to an EL which is being debugged
11236              *     and this is the first insn in the exception handler
11237              *  b) debug exceptions were masked and we just unmasked them
11238              *     without changing EL (eg by clearing PSTATE.D)
11239              * In either case we're going to take a swstep exception in the
11240              * "did not step an insn" case, and so the syndrome ISV and EX
11241              * bits should be zero.
11242              */
11243             assert(num_insns == 1);
11244             gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
11245                           default_exception_el(dc));
11246             dc->is_jmp = DISAS_EXC;
11247             break;
11248         }
11249
11250         disas_a64_insn(env, dc);
11251
11252         if (tcg_check_temp_count()) {
11253             fprintf(stderr, "TCG temporary leak before "TARGET_FMT_lx"\n",
11254                     dc->pc);
11255         }
11256
11257         /* Translation stops when a conditional branch is encountered.
11258          * Otherwise the subsequent code could get translated several times.
11259          * Also stop translation when a page boundary is reached.  This
11260          * ensures prefetch aborts occur at the right place.
11261          */
11262     } while (!dc->is_jmp && !tcg_op_buf_full() &&
11263              !cs->singlestep_enabled &&
11264              !singlestep &&
11265              !dc->ss_active &&
11266              dc->pc < next_page_start &&
11267              num_insns < max_insns);
11268
11269     if (tb->cflags & CF_LAST_IO) {
11270         gen_io_end();
11271     }
11272
11273     if (unlikely(cs->singlestep_enabled || dc->ss_active)
11274         && dc->is_jmp != DISAS_EXC) {
11275         /* Note that this means single stepping WFI doesn't halt the CPU.
11276          * For conditional branch insns this is harmless unreachable code as
11277          * gen_goto_tb() has already handled emitting the debug exception
11278          * (and thus a tb-jump is not possible when singlestepping).
11279          */
11280         assert(dc->is_jmp != DISAS_TB_JUMP);
11281         if (dc->is_jmp != DISAS_JUMP) {
11282             gen_a64_set_pc_im(dc->pc);
11283         }
11284         if (cs->singlestep_enabled) {
11285             gen_exception_internal(EXCP_DEBUG);
11286         } else {
11287             gen_step_complete_exception(dc);
11288         }
11289     } else {
11290         switch (dc->is_jmp) {
11291         case DISAS_NEXT:
11292             gen_goto_tb(dc, 1, dc->pc);
11293             break;
11294         default:
11295         case DISAS_UPDATE:
11296             gen_a64_set_pc_im(dc->pc);
11297             /* fall through */
11298         case DISAS_JUMP:
11299             /* indicate that the hash table must be used to find the next TB */
11300             tcg_gen_exit_tb(0);
11301             break;
11302         case DISAS_TB_JUMP:
11303         case DISAS_EXC:
11304         case DISAS_SWI:
11305             break;
11306         case DISAS_WFE:
11307             gen_a64_set_pc_im(dc->pc);
11308             gen_helper_wfe(cpu_env);
11309             break;
11310         case DISAS_YIELD:
11311             gen_a64_set_pc_im(dc->pc);
11312             gen_helper_yield(cpu_env);
11313             break;
11314         case DISAS_WFI:
11315             /* This is a special case because we don't want to just halt the CPU
11316              * if trying to debug across a WFI.
11317              */
11318             gen_a64_set_pc_im(dc->pc);
11319             gen_helper_wfi(cpu_env);
11320             /* The helper doesn't necessarily throw an exception, but we
11321              * must go back to the main loop to check for interrupts anyway.
11322              */
11323             tcg_gen_exit_tb(0);
11324             break;
11325         }
11326     }
11327
11328 done_generating:
11329     gen_tb_end(tb, num_insns);
11330
11331 #ifdef DEBUG_DISAS
11332     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) &&
11333         qemu_log_in_addr_range(pc_start)) {
11334         qemu_log("----------------\n");
11335         qemu_log("IN: %s\n", lookup_symbol(pc_start));
11336         log_target_disas(cs, pc_start, dc->pc - pc_start,
11337                          4 | (bswap_code(dc->sctlr_b) ? 2 : 0));
11338         qemu_log("\n");
11339     }
11340 #endif
11341     tb->size = dc->pc - pc_start;
11342     tb->icount = num_insns;
11343 }