target/arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "qemu/log.h"
  26 #include "arm_ldst.h"
  27 #include "translate.h"
  28 #include "internals.h"
  29 #include "qemu/host-utils.h"
  30
  31 #include "exec/semihost.h"
  32 #include "exec/gen-icount.h"
  33
  34 #include "exec/helper-proto.h"
  35 #include "exec/helper-gen.h"
  36 #include "exec/log.h"
  37
  38 #include "trace-tcg.h"
  39 #include "translate-a64.h"
  40
  41 static TCGv_i64 cpu_X[32];
  42 static TCGv_i64 cpu_pc;
  43
  44 /* Load/store exclusive handling */
  45 static TCGv_i64 cpu_exclusive_high;
  46
  47 static const char *regnames[] = {
  48     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  49     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  50     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  51     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  52 };
  53
  54 enum a64_shift_type {
  55     A64_SHIFT_TYPE_LSL = 0,
  56     A64_SHIFT_TYPE_LSR = 1,
  57     A64_SHIFT_TYPE_ASR = 2,
  58     A64_SHIFT_TYPE_ROR = 3
  59 };
  60
  61 /* Table based decoder typedefs - used when the relevant bits for decode
  62  * are too awkwardly scattered across the instruction (eg SIMD).
  63  */
  64 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  65
  66 typedef struct AArch64DecodeTable {
  67     uint32_t pattern;
  68     uint32_t mask;
  69     AArch64DecodeFn *disas_fn;
  70 } AArch64DecodeTable;
  71
  72 /* Function prototype for gen_ functions for calling Neon helpers */
  73 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  74 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  75 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  76 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  77 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  78 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  79 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  80 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  81 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  82 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  83 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  84 typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
  85 typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
  86 typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
  87 typedef void AtomicThreeOpFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGArg, TCGMemOp);
  88
  89 /* initialize TCG globals.  */
  90 void a64_translate_init(void)
  91 {
  92     int i;
  93
  94     cpu_pc = tcg_global_mem_new_i64(cpu_env,
  95                                     offsetof(CPUARMState, pc),
  96                                     "pc");
  97     for (i = 0; i < 32; i++) {
  98         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
  99                                           offsetof(CPUARMState, xregs[i]),
 100                                           regnames[i]);
 101     }
 102
 103     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 104         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 105 }
 106
 107 static inline int get_a64_user_mem_index(DisasContext *s)
 108 {
 109     /* Return the core mmu_idx to use for A64 "unprivileged load/store" insns:
 110      *  if EL1, access as if EL0; otherwise access at current EL
 111      */
 112     ARMMMUIdx useridx;
 113
 114     switch (s->mmu_idx) {
 115     case ARMMMUIdx_S12NSE1:
 116         useridx = ARMMMUIdx_S12NSE0;
 117         break;
 118     case ARMMMUIdx_S1SE1:
 119         useridx = ARMMMUIdx_S1SE0;
 120         break;
 121     case ARMMMUIdx_S2NS:
 122         g_assert_not_reached();
 123     default:
 124         useridx = s->mmu_idx;
 125         break;
 126     }
 127     return arm_to_core_mmu_idx(useridx);
 128 }
 129
 130 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 131                             fprintf_function cpu_fprintf, int flags)
 132 {
 133     ARMCPU *cpu = ARM_CPU(cs);
 134     CPUARMState *env = &cpu->env;
 135     uint32_t psr = pstate_read(env);
 136     int i;
 137     int el = arm_current_el(env);
 138     const char *ns_status;
 139
 140     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 141             env->pc, env->xregs[31]);
 142     for (i = 0; i < 31; i++) {
 143         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 144         if ((i % 4) == 3) {
 145             cpu_fprintf(f, "\n");
 146         } else {
 147             cpu_fprintf(f, " ");
 148         }
 149     }
 150
 151     if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 152         ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 153     } else {
 154         ns_status = "";
 155     }
 156
 157     cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 158                 psr,
 159                 psr & PSTATE_N ? 'N' : '-',
 160                 psr & PSTATE_Z ? 'Z' : '-',
 161                 psr & PSTATE_C ? 'C' : '-',
 162                 psr & PSTATE_V ? 'V' : '-',
 163                 ns_status,
 164                 el,
 165                 psr & PSTATE_SP ? 'h' : 't');
 166
 167     if (flags & CPU_DUMP_FPU) {
 168         int numvfpregs = 32;
 169         for (i = 0; i < numvfpregs; i++) {
 170             uint64_t *q = aa64_vfp_qreg(env, i);
 171             uint64_t vlo = q[0];
 172             uint64_t vhi = q[1];
 173             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "%c",
 174                         i, vhi, vlo, (i & 1 ? '\n' : ' '));
 175         }
 176         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 177                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 178     }
 179 }
 180
 181 void gen_a64_set_pc_im(uint64_t val)
 182 {
 183     tcg_gen_movi_i64(cpu_pc, val);
 184 }
 185
 186 /* Load the PC from a generic TCG variable.
 187  *
 188  * If address tagging is enabled via the TCR TBI bits, then loading
 189  * an address into the PC will clear out any tag in the it:
 190  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 191  *    then the address is zero-extended, clearing bits [63:56]
 192  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 193  *    and TBI1 controls addressses with bit 55 == 1.
 194  *    If the appropriate TBI bit is set for the address then
 195  *    the address is sign-extended from bit 55 into bits [63:56]
 196  *
 197  * We can avoid doing this for relative-branches, because the
 198  * PC + offset can never overflow into the tag bits (assuming
 199  * that virtual addresses are less than 56 bits wide, as they
 200  * are currently), but we must handle it for branch-to-register.
 201  */
 202 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 203 {
 204
 205     if (s->current_el <= 1) {
 206         /* Test if NEITHER or BOTH TBI values are set.  If so, no need to
 207          * examine bit 55 of address, can just generate code.
 208          * If mixed, then test via generated code
 209          */
 210         if (s->tbi0 && s->tbi1) {
 211             TCGv_i64 tmp_reg = tcg_temp_new_i64();
 212             /* Both bits set, sign extension from bit 55 into [63:56] will
 213              * cover both cases
 214              */
 215             tcg_gen_shli_i64(tmp_reg, src, 8);
 216             tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);
 217             tcg_temp_free_i64(tmp_reg);
 218         } else if (!s->tbi0 && !s->tbi1) {
 219             /* Neither bit set, just load it as-is */
 220             tcg_gen_mov_i64(cpu_pc, src);
 221         } else {
 222             TCGv_i64 tcg_tmpval = tcg_temp_new_i64();
 223             TCGv_i64 tcg_bit55  = tcg_temp_new_i64();
 224             TCGv_i64 tcg_zero   = tcg_const_i64(0);
 225
 226             tcg_gen_andi_i64(tcg_bit55, src, (1ull << 55));
 227
 228             if (s->tbi0) {
 229                 /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */
 230                 tcg_gen_andi_i64(tcg_tmpval, src,
 231                                  0x00FFFFFFFFFFFFFFull);
 232                 tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,
 233                                     tcg_tmpval, src);
 234             } else {
 235                 /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */
 236                 tcg_gen_ori_i64(tcg_tmpval, src,
 237                                 0xFF00000000000000ull);
 238                 tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,
 239                                     tcg_tmpval, src);
 240             }
 241             tcg_temp_free_i64(tcg_zero);
 242             tcg_temp_free_i64(tcg_bit55);
 243             tcg_temp_free_i64(tcg_tmpval);
 244         }
 245     } else {  /* EL > 1 */
 246         if (s->tbi0) {
 247             /* Force tag byte to all zero */
 248             tcg_gen_andi_i64(cpu_pc, src, 0x00FFFFFFFFFFFFFFull);
 249         } else {
 250             /* Load unmodified address */
 251             tcg_gen_mov_i64(cpu_pc, src);
 252         }
 253     }
 254 }
 255
 256 typedef struct DisasCompare64 {
 257     TCGCond cond;
 258     TCGv_i64 value;
 259 } DisasCompare64;
 260
 261 static void a64_test_cc(DisasCompare64 *c64, int cc)
 262 {
 263     DisasCompare c32;
 264
 265     arm_test_cc(&c32, cc);
 266
 267     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 268        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 269     c64->cond = c32.cond;
 270     c64->value = tcg_temp_new_i64();
 271     tcg_gen_ext_i32_i64(c64->value, c32.value);
 272
 273     arm_free_cc(&c32);
 274 }
 275
 276 static void a64_free_cc(DisasCompare64 *c64)
 277 {
 278     tcg_temp_free_i64(c64->value);
 279 }
 280
 281 static void gen_exception_internal(int excp)
 282 {
 283     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 284
 285     assert(excp_is_internal(excp));
 286     gen_helper_exception_internal(cpu_env, tcg_excp);
 287     tcg_temp_free_i32(tcg_excp);
 288 }
 289
 290 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 291 {
 292     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 293     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 294     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 295
 296     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 297                                        tcg_syn, tcg_el);
 298     tcg_temp_free_i32(tcg_el);
 299     tcg_temp_free_i32(tcg_syn);
 300     tcg_temp_free_i32(tcg_excp);
 301 }
 302
 303 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 304 {
 305     gen_a64_set_pc_im(s->pc - offset);
 306     gen_exception_internal(excp);
 307     s->base.is_jmp = DISAS_NORETURN;
 308 }
 309
 310 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 311                                uint32_t syndrome, uint32_t target_el)
 312 {
 313     gen_a64_set_pc_im(s->pc - offset);
 314     gen_exception(excp, syndrome, target_el);
 315     s->base.is_jmp = DISAS_NORETURN;
 316 }
 317
 318 static void gen_exception_bkpt_insn(DisasContext *s, int offset,
 319                                     uint32_t syndrome)
 320 {
 321     TCGv_i32 tcg_syn;
 322
 323     gen_a64_set_pc_im(s->pc - offset);
 324     tcg_syn = tcg_const_i32(syndrome);
 325     gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
 326     tcg_temp_free_i32(tcg_syn);
 327     s->base.is_jmp = DISAS_NORETURN;
 328 }
 329
 330 static void gen_ss_advance(DisasContext *s)
 331 {
 332     /* If the singlestep state is Active-not-pending, advance to
 333      * Active-pending.
 334      */
 335     if (s->ss_active) {
 336         s->pstate_ss = 0;
 337         gen_helper_clear_pstate_ss(cpu_env);
 338     }
 339 }
 340
 341 static void gen_step_complete_exception(DisasContext *s)
 342 {
 343     /* We just completed step of an insn. Move from Active-not-pending
 344      * to Active-pending, and then also take the swstep exception.
 345      * This corresponds to making the (IMPDEF) choice to prioritize
 346      * swstep exceptions over asynchronous exceptions taken to an exception
 347      * level where debug is disabled. This choice has the advantage that
 348      * we do not need to maintain internal state corresponding to the
 349      * ISV/EX syndrome bits between completion of the step and generation
 350      * of the exception, and our syndrome information is always correct.
 351      */
 352     gen_ss_advance(s);
 353     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 354                   default_exception_el(s));
 355     s->base.is_jmp = DISAS_NORETURN;
 356 }
 357
 358 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 359 {
 360     /* No direct tb linking with singlestep (either QEMU's or the ARM
 361      * debug architecture kind) or deterministic io
 362      */
 363     if (s->base.singlestep_enabled || s->ss_active ||
 364         (tb_cflags(s->base.tb) & CF_LAST_IO)) {
 365         return false;
 366     }
 367
 368 #ifndef CONFIG_USER_ONLY
 369     /* Only link tbs from inside the same guest page */
 370     if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 371         return false;
 372     }
 373 #endif
 374
 375     return true;
 376 }
 377
 378 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 379 {
 380     TranslationBlock *tb;
 381
 382     tb = s->base.tb;
 383     if (use_goto_tb(s, n, dest)) {
 384         tcg_gen_goto_tb(n);
 385         gen_a64_set_pc_im(dest);
 386         tcg_gen_exit_tb(tb, n);
 387         s->base.is_jmp = DISAS_NORETURN;
 388     } else {
 389         gen_a64_set_pc_im(dest);
 390         if (s->ss_active) {
 391             gen_step_complete_exception(s);
 392         } else if (s->base.singlestep_enabled) {
 393             gen_exception_internal(EXCP_DEBUG);
 394         } else {
 395             tcg_gen_lookup_and_goto_ptr();
 396             s->base.is_jmp = DISAS_NORETURN;
 397         }
 398     }
 399 }
 400
 401 void unallocated_encoding(DisasContext *s)
 402 {
 403     /* Unallocated and reserved encodings are uncategorized */
 404     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 405                        default_exception_el(s));
 406 }
 407
 408 static void init_tmp_a64_array(DisasContext *s)
 409 {
 410 #ifdef CONFIG_DEBUG_TCG
 411     memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
 412 #endif
 413     s->tmp_a64_count = 0;
 414 }
 415
 416 static void free_tmp_a64(DisasContext *s)
 417 {
 418     int i;
 419     for (i = 0; i < s->tmp_a64_count; i++) {
 420         tcg_temp_free_i64(s->tmp_a64[i]);
 421     }
 422     init_tmp_a64_array(s);
 423 }
 424
 425 TCGv_i64 new_tmp_a64(DisasContext *s)
 426 {
 427     assert(s->tmp_a64_count < TMP_A64_MAX);
 428     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 429 }
 430
 431 TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 432 {
 433     TCGv_i64 t = new_tmp_a64(s);
 434     tcg_gen_movi_i64(t, 0);
 435     return t;
 436 }
 437
 438 /*
 439  * Register access functions
 440  *
 441  * These functions are used for directly accessing a register in where
 442  * changes to the final register value are likely to be made. If you
 443  * need to use a register for temporary calculation (e.g. index type
 444  * operations) use the read_* form.
 445  *
 446  * B1.2.1 Register mappings
 447  *
 448  * In instruction register encoding 31 can refer to ZR (zero register) or
 449  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 450  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 451  * This is the point of the _sp forms.
 452  */
 453 TCGv_i64 cpu_reg(DisasContext *s, int reg)
 454 {
 455     if (reg == 31) {
 456         return new_tmp_a64_zero(s);
 457     } else {
 458         return cpu_X[reg];
 459     }
 460 }
 461
 462 /* register access for when 31 == SP */
 463 TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 464 {
 465     return cpu_X[reg];
 466 }
 467
 468 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 469  * representing the register contents. This TCGv is an auto-freed
 470  * temporary so it need not be explicitly freed, and may be modified.
 471  */
 472 TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 473 {
 474     TCGv_i64 v = new_tmp_a64(s);
 475     if (reg != 31) {
 476         if (sf) {
 477             tcg_gen_mov_i64(v, cpu_X[reg]);
 478         } else {
 479             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 480         }
 481     } else {
 482         tcg_gen_movi_i64(v, 0);
 483     }
 484     return v;
 485 }
 486
 487 TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 488 {
 489     TCGv_i64 v = new_tmp_a64(s);
 490     if (sf) {
 491         tcg_gen_mov_i64(v, cpu_X[reg]);
 492     } else {
 493         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 494     }
 495     return v;
 496 }
 497
 498 /* Return the offset into CPUARMState of a slice (from
 499  * the least significant end) of FP register Qn (ie
 500  * Dn, Sn, Hn or Bn).
 501  * (Note that this is not the same mapping as for A32; see cpu.h)
 502  */
 503 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 504 {
 505     return vec_reg_offset(s, regno, 0, size);
 506 }
 507
 508 /* Offset of the high half of the 128 bit vector Qn */
 509 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 510 {
 511     return vec_reg_offset(s, regno, 1, MO_64);
 512 }
 513
 514 /* Convenience accessors for reading and writing single and double
 515  * FP registers. Writing clears the upper parts of the associated
 516  * 128 bit vector register, as required by the architecture.
 517  * Note that unlike the GP register accessors, the values returned
 518  * by the read functions must be manually freed.
 519  */
 520 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 521 {
 522     TCGv_i64 v = tcg_temp_new_i64();
 523
 524     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 525     return v;
 526 }
 527
 528 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 529 {
 530     TCGv_i32 v = tcg_temp_new_i32();
 531
 532     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 533     return v;
 534 }
 535
 536 static TCGv_i32 read_fp_hreg(DisasContext *s, int reg)
 537 {
 538     TCGv_i32 v = tcg_temp_new_i32();
 539
 540     tcg_gen_ld16u_i32(v, cpu_env, fp_reg_offset(s, reg, MO_16));
 541     return v;
 542 }
 543
 544 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
 545  * If SVE is not enabled, then there are only 128 bits in the vector.
 546  */
 547 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
 548 {
 549     unsigned ofs = fp_reg_offset(s, rd, MO_64);
 550     unsigned vsz = vec_full_reg_size(s);
 551
 552     if (!is_q) {
 553         TCGv_i64 tcg_zero = tcg_const_i64(0);
 554         tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
 555         tcg_temp_free_i64(tcg_zero);
 556     }
 557     if (vsz > 16) {
 558         tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
 559     }
 560 }
 561
 562 void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 563 {
 564     unsigned ofs = fp_reg_offset(s, reg, MO_64);
 565
 566     tcg_gen_st_i64(v, cpu_env, ofs);
 567     clear_vec_high(s, false, reg);
 568 }
 569
 570 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 571 {
 572     TCGv_i64 tmp = tcg_temp_new_i64();
 573
 574     tcg_gen_extu_i32_i64(tmp, v);
 575     write_fp_dreg(s, reg, tmp);
 576     tcg_temp_free_i64(tmp);
 577 }
 578
 579 TCGv_ptr get_fpstatus_ptr(bool is_f16)
 580 {
 581     TCGv_ptr statusptr = tcg_temp_new_ptr();
 582     int offset;
 583
 584     /* In A64 all instructions (both FP and Neon) use the FPCR; there
 585      * is no equivalent of the A32 Neon "standard FPSCR value".
 586      * However half-precision operations operate under a different
 587      * FZ16 flag and use vfp.fp_status_f16 instead of vfp.fp_status.
 588      */
 589     if (is_f16) {
 590         offset = offsetof(CPUARMState, vfp.fp_status_f16);
 591     } else {
 592         offset = offsetof(CPUARMState, vfp.fp_status);
 593     }
 594     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 595     return statusptr;
 596 }
 597
 598 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
 599 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
 600                          GVecGen2Fn *gvec_fn, int vece)
 601 {
 602     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 603             is_q ? 16 : 8, vec_full_reg_size(s));
 604 }
 605
 606 /* Expand a 2-operand + immediate AdvSIMD vector operation using
 607  * an expander function.
 608  */
 609 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
 610                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
 611 {
 612     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 613             imm, is_q ? 16 : 8, vec_full_reg_size(s));
 614 }
 615
 616 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
 617 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
 618                          GVecGen3Fn *gvec_fn, int vece)
 619 {
 620     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 621             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
 622 }
 623
 624 /* Expand a 2-operand + immediate AdvSIMD vector operation using
 625  * an op descriptor.
 626  */
 627 static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
 628                           int rn, int64_t imm, const GVecGen2i *gvec_op)
 629 {
 630     tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 631                     is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
 632 }
 633
 634 /* Expand a 3-operand AdvSIMD vector operation using an op descriptor.  */
 635 static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
 636                          int rn, int rm, const GVecGen3 *gvec_op)
 637 {
 638     tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 639                    vec_full_reg_offset(s, rm), is_q ? 16 : 8,
 640                    vec_full_reg_size(s), gvec_op);
 641 }
 642
 643 /* Expand a 3-operand operation using an out-of-line helper.  */
 644 static void gen_gvec_op3_ool(DisasContext *s, bool is_q, int rd,
 645                              int rn, int rm, int data, gen_helper_gvec_3 *fn)
 646 {
 647     tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
 648                        vec_full_reg_offset(s, rn),
 649                        vec_full_reg_offset(s, rm),
 650                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 651 }
 652
 653 /* Expand a 3-operand + env pointer operation using
 654  * an out-of-line helper.
 655  */
 656 static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
 657                              int rn, int rm, gen_helper_gvec_3_ptr *fn)
 658 {
 659     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 660                        vec_full_reg_offset(s, rn),
 661                        vec_full_reg_offset(s, rm), cpu_env,
 662                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
 663 }
 664
 665 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
 666  * an out-of-line helper.
 667  */
 668 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
 669                               int rm, bool is_fp16, int data,
 670                               gen_helper_gvec_3_ptr *fn)
 671 {
 672     TCGv_ptr fpst = get_fpstatus_ptr(is_fp16);
 673     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 674                        vec_full_reg_offset(s, rn),
 675                        vec_full_reg_offset(s, rm), fpst,
 676                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 677     tcg_temp_free_ptr(fpst);
 678 }
 679
 680 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 681  * than the 32 bit equivalent.
 682  */
 683 static inline void gen_set_NZ64(TCGv_i64 result)
 684 {
 685     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 686     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 687 }
 688
 689 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 690 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 691 {
 692     if (sf) {
 693         gen_set_NZ64(result);
 694     } else {
 695         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 696         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 697     }
 698     tcg_gen_movi_i32(cpu_CF, 0);
 699     tcg_gen_movi_i32(cpu_VF, 0);
 700 }
 701
 702 /* dest = T0 + T1; compute C, N, V and Z flags */
 703 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 704 {
 705     if (sf) {
 706         TCGv_i64 result, flag, tmp;
 707         result = tcg_temp_new_i64();
 708         flag = tcg_temp_new_i64();
 709         tmp = tcg_temp_new_i64();
 710
 711         tcg_gen_movi_i64(tmp, 0);
 712         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 713
 714         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 715
 716         gen_set_NZ64(result);
 717
 718         tcg_gen_xor_i64(flag, result, t0);
 719         tcg_gen_xor_i64(tmp, t0, t1);
 720         tcg_gen_andc_i64(flag, flag, tmp);
 721         tcg_temp_free_i64(tmp);
 722         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 723
 724         tcg_gen_mov_i64(dest, result);
 725         tcg_temp_free_i64(result);
 726         tcg_temp_free_i64(flag);
 727     } else {
 728         /* 32 bit arithmetic */
 729         TCGv_i32 t0_32 = tcg_temp_new_i32();
 730         TCGv_i32 t1_32 = tcg_temp_new_i32();
 731         TCGv_i32 tmp = tcg_temp_new_i32();
 732
 733         tcg_gen_movi_i32(tmp, 0);
 734         tcg_gen_extrl_i64_i32(t0_32, t0);
 735         tcg_gen_extrl_i64_i32(t1_32, t1);
 736         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 737         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 738         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 739         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 740         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 741         tcg_gen_extu_i32_i64(dest, cpu_NF);
 742
 743         tcg_temp_free_i32(tmp);
 744         tcg_temp_free_i32(t0_32);
 745         tcg_temp_free_i32(t1_32);
 746     }
 747 }
 748
 749 /* dest = T0 - T1; compute C, N, V and Z flags */
 750 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 751 {
 752     if (sf) {
 753         /* 64 bit arithmetic */
 754         TCGv_i64 result, flag, tmp;
 755
 756         result = tcg_temp_new_i64();
 757         flag = tcg_temp_new_i64();
 758         tcg_gen_sub_i64(result, t0, t1);
 759
 760         gen_set_NZ64(result);
 761
 762         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 763         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 764
 765         tcg_gen_xor_i64(flag, result, t0);
 766         tmp = tcg_temp_new_i64();
 767         tcg_gen_xor_i64(tmp, t0, t1);
 768         tcg_gen_and_i64(flag, flag, tmp);
 769         tcg_temp_free_i64(tmp);
 770         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 771         tcg_gen_mov_i64(dest, result);
 772         tcg_temp_free_i64(flag);
 773         tcg_temp_free_i64(result);
 774     } else {
 775         /* 32 bit arithmetic */
 776         TCGv_i32 t0_32 = tcg_temp_new_i32();
 777         TCGv_i32 t1_32 = tcg_temp_new_i32();
 778         TCGv_i32 tmp;
 779
 780         tcg_gen_extrl_i64_i32(t0_32, t0);
 781         tcg_gen_extrl_i64_i32(t1_32, t1);
 782         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 783         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 784         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 785         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 786         tmp = tcg_temp_new_i32();
 787         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 788         tcg_temp_free_i32(t0_32);
 789         tcg_temp_free_i32(t1_32);
 790         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 791         tcg_temp_free_i32(tmp);
 792         tcg_gen_extu_i32_i64(dest, cpu_NF);
 793     }
 794 }
 795
 796 /* dest = T0 + T1 + CF; do not compute flags. */
 797 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 798 {
 799     TCGv_i64 flag = tcg_temp_new_i64();
 800     tcg_gen_extu_i32_i64(flag, cpu_CF);
 801     tcg_gen_add_i64(dest, t0, t1);
 802     tcg_gen_add_i64(dest, dest, flag);
 803     tcg_temp_free_i64(flag);
 804
 805     if (!sf) {
 806         tcg_gen_ext32u_i64(dest, dest);
 807     }
 808 }
 809
 810 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 811 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 812 {
 813     if (sf) {
 814         TCGv_i64 result, cf_64, vf_64, tmp;
 815         result = tcg_temp_new_i64();
 816         cf_64 = tcg_temp_new_i64();
 817         vf_64 = tcg_temp_new_i64();
 818         tmp = tcg_const_i64(0);
 819
 820         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 821         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 822         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 823         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 824         gen_set_NZ64(result);
 825
 826         tcg_gen_xor_i64(vf_64, result, t0);
 827         tcg_gen_xor_i64(tmp, t0, t1);
 828         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 829         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 830
 831         tcg_gen_mov_i64(dest, result);
 832
 833         tcg_temp_free_i64(tmp);
 834         tcg_temp_free_i64(vf_64);
 835         tcg_temp_free_i64(cf_64);
 836         tcg_temp_free_i64(result);
 837     } else {
 838         TCGv_i32 t0_32, t1_32, tmp;
 839         t0_32 = tcg_temp_new_i32();
 840         t1_32 = tcg_temp_new_i32();
 841         tmp = tcg_const_i32(0);
 842
 843         tcg_gen_extrl_i64_i32(t0_32, t0);
 844         tcg_gen_extrl_i64_i32(t1_32, t1);
 845         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 846         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 847
 848         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 849         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 850         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 851         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 852         tcg_gen_extu_i32_i64(dest, cpu_NF);
 853
 854         tcg_temp_free_i32(tmp);
 855         tcg_temp_free_i32(t1_32);
 856         tcg_temp_free_i32(t0_32);
 857     }
 858 }
 859
 860 /*
 861  * Load/Store generators
 862  */
 863
 864 /*
 865  * Store from GPR register to memory.
 866  */
 867 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 868                              TCGv_i64 tcg_addr, int size, int memidx,
 869                              bool iss_valid,
 870                              unsigned int iss_srt,
 871                              bool iss_sf, bool iss_ar)
 872 {
 873     g_assert(size <= 3);
 874     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 875
 876     if (iss_valid) {
 877         uint32_t syn;
 878
 879         syn = syn_data_abort_with_iss(0,
 880                                       size,
 881                                       false,
 882                                       iss_srt,
 883                                       iss_sf,
 884                                       iss_ar,
 885                                       0, 0, 0, 0, 0, false);
 886         disas_set_insn_syndrome(s, syn);
 887     }
 888 }
 889
 890 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 891                       TCGv_i64 tcg_addr, int size,
 892                       bool iss_valid,
 893                       unsigned int iss_srt,
 894                       bool iss_sf, bool iss_ar)
 895 {
 896     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 897                      iss_valid, iss_srt, iss_sf, iss_ar);
 898 }
 899
 900 /*
 901  * Load from memory to GPR register
 902  */
 903 static void do_gpr_ld_memidx(DisasContext *s,
 904                              TCGv_i64 dest, TCGv_i64 tcg_addr,
 905                              int size, bool is_signed,
 906                              bool extend, int memidx,
 907                              bool iss_valid, unsigned int iss_srt,
 908                              bool iss_sf, bool iss_ar)
 909 {
 910     TCGMemOp memop = s->be_data + size;
 911
 912     g_assert(size <= 3);
 913
 914     if (is_signed) {
 915         memop += MO_SIGN;
 916     }
 917
 918     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 919
 920     if (extend && is_signed) {
 921         g_assert(size < 3);
 922         tcg_gen_ext32u_i64(dest, dest);
 923     }
 924
 925     if (iss_valid) {
 926         uint32_t syn;
 927
 928         syn = syn_data_abort_with_iss(0,
 929                                       size,
 930                                       is_signed,
 931                                       iss_srt,
 932                                       iss_sf,
 933                                       iss_ar,
 934                                       0, 0, 0, 0, 0, false);
 935         disas_set_insn_syndrome(s, syn);
 936     }
 937 }
 938
 939 static void do_gpr_ld(DisasContext *s,
 940                       TCGv_i64 dest, TCGv_i64 tcg_addr,
 941                       int size, bool is_signed, bool extend,
 942                       bool iss_valid, unsigned int iss_srt,
 943                       bool iss_sf, bool iss_ar)
 944 {
 945     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 946                      get_mem_index(s),
 947                      iss_valid, iss_srt, iss_sf, iss_ar);
 948 }
 949
 950 /*
 951  * Store from FP register to memory
 952  */
 953 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
 954 {
 955     /* This writes the bottom N bits of a 128 bit wide vector to memory */
 956     TCGv_i64 tmp = tcg_temp_new_i64();
 957     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
 958     if (size < 4) {
 959         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
 960                             s->be_data + size);
 961     } else {
 962         bool be = s->be_data == MO_BE;
 963         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
 964
 965         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 966         tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 967                             s->be_data | MO_Q);
 968         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
 969         tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
 970                             s->be_data | MO_Q);
 971         tcg_temp_free_i64(tcg_hiaddr);
 972     }
 973
 974     tcg_temp_free_i64(tmp);
 975 }
 976
 977 /*
 978  * Load from memory to FP register
 979  */
 980 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
 981 {
 982     /* This always zero-extends and writes to a full 128 bit wide vector */
 983     TCGv_i64 tmplo = tcg_temp_new_i64();
 984     TCGv_i64 tmphi;
 985
 986     if (size < 4) {
 987         TCGMemOp memop = s->be_data + size;
 988         tmphi = tcg_const_i64(0);
 989         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
 990     } else {
 991         bool be = s->be_data == MO_BE;
 992         TCGv_i64 tcg_hiaddr;
 993
 994         tmphi = tcg_temp_new_i64();
 995         tcg_hiaddr = tcg_temp_new_i64();
 996
 997         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
 998         tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
 999                             s->be_data | MO_Q);
1000         tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
1001                             s->be_data | MO_Q);
1002         tcg_temp_free_i64(tcg_hiaddr);
1003     }
1004
1005     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
1006     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
1007
1008     tcg_temp_free_i64(tmplo);
1009     tcg_temp_free_i64(tmphi);
1010
1011     clear_vec_high(s, true, destidx);
1012 }
1013
1014 /*
1015  * Vector load/store helpers.
1016  *
1017  * The principal difference between this and a FP load is that we don't
1018  * zero extend as we are filling a partial chunk of the vector register.
1019  * These functions don't support 128 bit loads/stores, which would be
1020  * normal load/store operations.
1021  *
1022  * The _i32 versions are useful when operating on 32 bit quantities
1023  * (eg for floating point single or using Neon helper functions).
1024  */
1025
1026 /* Get value of an element within a vector register */
1027 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1028                              int element, TCGMemOp memop)
1029 {
1030     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1031     switch (memop) {
1032     case MO_8:
1033         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
1034         break;
1035     case MO_16:
1036         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
1037         break;
1038     case MO_32:
1039         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
1040         break;
1041     case MO_8|MO_SIGN:
1042         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
1043         break;
1044     case MO_16|MO_SIGN:
1045         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
1046         break;
1047     case MO_32|MO_SIGN:
1048         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
1049         break;
1050     case MO_64:
1051     case MO_64|MO_SIGN:
1052         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1053         break;
1054     default:
1055         g_assert_not_reached();
1056     }
1057 }
1058
1059 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1060                                  int element, TCGMemOp memop)
1061 {
1062     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1063     switch (memop) {
1064     case MO_8:
1065         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1066         break;
1067     case MO_16:
1068         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1069         break;
1070     case MO_8|MO_SIGN:
1071         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1072         break;
1073     case MO_16|MO_SIGN:
1074         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1075         break;
1076     case MO_32:
1077     case MO_32|MO_SIGN:
1078         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1079         break;
1080     default:
1081         g_assert_not_reached();
1082     }
1083 }
1084
1085 /* Set value of an element within a vector register */
1086 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1087                               int element, TCGMemOp memop)
1088 {
1089     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1090     switch (memop) {
1091     case MO_8:
1092         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1093         break;
1094     case MO_16:
1095         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1096         break;
1097     case MO_32:
1098         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1099         break;
1100     case MO_64:
1101         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1102         break;
1103     default:
1104         g_assert_not_reached();
1105     }
1106 }
1107
1108 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1109                                   int destidx, int element, TCGMemOp memop)
1110 {
1111     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1112     switch (memop) {
1113     case MO_8:
1114         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1115         break;
1116     case MO_16:
1117         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1118         break;
1119     case MO_32:
1120         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1121         break;
1122     default:
1123         g_assert_not_reached();
1124     }
1125 }
1126
1127 /* Store from vector register to memory */
1128 static void do_vec_st(DisasContext *s, int srcidx, int element,
1129                       TCGv_i64 tcg_addr, int size)
1130 {
1131     TCGMemOp memop = s->be_data + size;
1132     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1133
1134     read_vec_element(s, tcg_tmp, srcidx, element, size);
1135     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1136
1137     tcg_temp_free_i64(tcg_tmp);
1138 }
1139
1140 /* Load from memory to vector register */
1141 static void do_vec_ld(DisasContext *s, int destidx, int element,
1142                       TCGv_i64 tcg_addr, int size)
1143 {
1144     TCGMemOp memop = s->be_data + size;
1145     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1146
1147     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1148     write_vec_element(s, tcg_tmp, destidx, element, size);
1149
1150     tcg_temp_free_i64(tcg_tmp);
1151 }
1152
1153 /* Check that FP/Neon access is enabled. If it is, return
1154  * true. If not, emit code to generate an appropriate exception,
1155  * and return false; the caller should not emit any code for
1156  * the instruction. Note that this check must happen after all
1157  * unallocated-encoding checks (otherwise the syndrome information
1158  * for the resulting exception will be incorrect).
1159  */
1160 static inline bool fp_access_check(DisasContext *s)
1161 {
1162     assert(!s->fp_access_checked);
1163     s->fp_access_checked = true;
1164
1165     if (!s->fp_excp_el) {
1166         return true;
1167     }
1168
1169     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1170                        s->fp_excp_el);
1171     return false;
1172 }
1173
1174 /* Check that SVE access is enabled.  If it is, return true.
1175  * If not, emit code to generate an appropriate exception and return false.
1176  */
1177 bool sve_access_check(DisasContext *s)
1178 {
1179     if (s->sve_excp_el) {
1180         gen_exception_insn(s, 4, EXCP_UDEF, syn_sve_access_trap(),
1181                            s->sve_excp_el);
1182         return false;
1183     }
1184     return fp_access_check(s);
1185 }
1186
1187 /*
1188  * This utility function is for doing register extension with an
1189  * optional shift. You will likely want to pass a temporary for the
1190  * destination register. See DecodeRegExtend() in the ARM ARM.
1191  */
1192 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1193                               int option, unsigned int shift)
1194 {
1195     int extsize = extract32(option, 0, 2);
1196     bool is_signed = extract32(option, 2, 1);
1197
1198     if (is_signed) {
1199         switch (extsize) {
1200         case 0:
1201             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1202             break;
1203         case 1:
1204             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1205             break;
1206         case 2:
1207             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1208             break;
1209         case 3:
1210             tcg_gen_mov_i64(tcg_out, tcg_in);
1211             break;
1212         }
1213     } else {
1214         switch (extsize) {
1215         case 0:
1216             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1217             break;
1218         case 1:
1219             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1220             break;
1221         case 2:
1222             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1223             break;
1224         case 3:
1225             tcg_gen_mov_i64(tcg_out, tcg_in);
1226             break;
1227         }
1228     }
1229
1230     if (shift) {
1231         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1232     }
1233 }
1234
1235 static inline void gen_check_sp_alignment(DisasContext *s)
1236 {
1237     /* The AArch64 architecture mandates that (if enabled via PSTATE
1238      * or SCTLR bits) there is a check that SP is 16-aligned on every
1239      * SP-relative load or store (with an exception generated if it is not).
1240      * In line with general QEMU practice regarding misaligned accesses,
1241      * we omit these checks for the sake of guest program performance.
1242      * This function is provided as a hook so we can more easily add these
1243      * checks in future (possibly as a "favour catching guest program bugs
1244      * over speed" user selectable option).
1245      */
1246 }
1247
1248 /*
1249  * This provides a simple table based table lookup decoder. It is
1250  * intended to be used when the relevant bits for decode are too
1251  * awkwardly placed and switch/if based logic would be confusing and
1252  * deeply nested. Since it's a linear search through the table, tables
1253  * should be kept small.
1254  *
1255  * It returns the first handler where insn & mask == pattern, or
1256  * NULL if there is no match.
1257  * The table is terminated by an empty mask (i.e. 0)
1258  */
1259 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1260                                                uint32_t insn)
1261 {
1262     const AArch64DecodeTable *tptr = table;
1263
1264     while (tptr->mask) {
1265         if ((insn & tptr->mask) == tptr->pattern) {
1266             return tptr->disas_fn;
1267         }
1268         tptr++;
1269     }
1270     return NULL;
1271 }
1272
1273 /*
1274  * The instruction disassembly implemented here matches
1275  * the instruction encoding classifications in chapter C4
1276  * of the ARM Architecture Reference Manual (DDI0487B_a);
1277  * classification names and decode diagrams here should generally
1278  * match up with those in the manual.
1279  */
1280
1281 /* Unconditional branch (immediate)
1282  *   31  30       26 25                                  0
1283  * +----+-----------+-------------------------------------+
1284  * | op | 0 0 1 0 1 |                 imm26               |
1285  * +----+-----------+-------------------------------------+
1286  */
1287 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1288 {
1289     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1290
1291     if (insn & (1U << 31)) {
1292         /* BL Branch with link */
1293         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1294     }
1295
1296     /* B Branch / BL Branch with link */
1297     gen_goto_tb(s, 0, addr);
1298 }
1299
1300 /* Compare and branch (immediate)
1301  *   31  30         25  24  23                  5 4      0
1302  * +----+-------------+----+---------------------+--------+
1303  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1304  * +----+-------------+----+---------------------+--------+
1305  */
1306 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1307 {
1308     unsigned int sf, op, rt;
1309     uint64_t addr;
1310     TCGLabel *label_match;
1311     TCGv_i64 tcg_cmp;
1312
1313     sf = extract32(insn, 31, 1);
1314     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1315     rt = extract32(insn, 0, 5);
1316     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1317
1318     tcg_cmp = read_cpu_reg(s, rt, sf);
1319     label_match = gen_new_label();
1320
1321     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1322                         tcg_cmp, 0, label_match);
1323
1324     gen_goto_tb(s, 0, s->pc);
1325     gen_set_label(label_match);
1326     gen_goto_tb(s, 1, addr);
1327 }
1328
1329 /* Test and branch (immediate)
1330  *   31  30         25  24  23   19 18          5 4    0
1331  * +----+-------------+----+-------+-------------+------+
1332  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1333  * +----+-------------+----+-------+-------------+------+
1334  */
1335 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1336 {
1337     unsigned int bit_pos, op, rt;
1338     uint64_t addr;
1339     TCGLabel *label_match;
1340     TCGv_i64 tcg_cmp;
1341
1342     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1343     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1344     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1345     rt = extract32(insn, 0, 5);
1346
1347     tcg_cmp = tcg_temp_new_i64();
1348     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1349     label_match = gen_new_label();
1350     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1351                         tcg_cmp, 0, label_match);
1352     tcg_temp_free_i64(tcg_cmp);
1353     gen_goto_tb(s, 0, s->pc);
1354     gen_set_label(label_match);
1355     gen_goto_tb(s, 1, addr);
1356 }
1357
1358 /* Conditional branch (immediate)
1359  *  31           25  24  23                  5   4  3    0
1360  * +---------------+----+---------------------+----+------+
1361  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1362  * +---------------+----+---------------------+----+------+
1363  */
1364 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1365 {
1366     unsigned int cond;
1367     uint64_t addr;
1368
1369     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1370         unallocated_encoding(s);
1371         return;
1372     }
1373     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1374     cond = extract32(insn, 0, 4);
1375
1376     if (cond < 0x0e) {
1377         /* genuinely conditional branches */
1378         TCGLabel *label_match = gen_new_label();
1379         arm_gen_test_cc(cond, label_match);
1380         gen_goto_tb(s, 0, s->pc);
1381         gen_set_label(label_match);
1382         gen_goto_tb(s, 1, addr);
1383     } else {
1384         /* 0xe and 0xf are both "always" conditions */
1385         gen_goto_tb(s, 0, addr);
1386     }
1387 }
1388
1389 /* HINT instruction group, including various allocated HINTs */
1390 static void handle_hint(DisasContext *s, uint32_t insn,
1391                         unsigned int op1, unsigned int op2, unsigned int crm)
1392 {
1393     unsigned int selector = crm << 3 | op2;
1394
1395     if (op1 != 3) {
1396         unallocated_encoding(s);
1397         return;
1398     }
1399
1400     switch (selector) {
1401     case 0: /* NOP */
1402         return;
1403     case 3: /* WFI */
1404         s->base.is_jmp = DISAS_WFI;
1405         return;
1406         /* When running in MTTCG we don't generate jumps to the yield and
1407          * WFE helpers as it won't affect the scheduling of other vCPUs.
1408          * If we wanted to more completely model WFE/SEV so we don't busy
1409          * spin unnecessarily we would need to do something more involved.
1410          */
1411     case 1: /* YIELD */
1412         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1413             s->base.is_jmp = DISAS_YIELD;
1414         }
1415         return;
1416     case 2: /* WFE */
1417         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1418             s->base.is_jmp = DISAS_WFE;
1419         }
1420         return;
1421     case 4: /* SEV */
1422     case 5: /* SEVL */
1423         /* we treat all as NOP at least for now */
1424         return;
1425     default:
1426         /* default specified as NOP equivalent */
1427         return;
1428     }
1429 }
1430
1431 static void gen_clrex(DisasContext *s, uint32_t insn)
1432 {
1433     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1434 }
1435
1436 /* CLREX, DSB, DMB, ISB */
1437 static void handle_sync(DisasContext *s, uint32_t insn,
1438                         unsigned int op1, unsigned int op2, unsigned int crm)
1439 {
1440     TCGBar bar;
1441
1442     if (op1 != 3) {
1443         unallocated_encoding(s);
1444         return;
1445     }
1446
1447     switch (op2) {
1448     case 2: /* CLREX */
1449         gen_clrex(s, insn);
1450         return;
1451     case 4: /* DSB */
1452     case 5: /* DMB */
1453         switch (crm & 3) {
1454         case 1: /* MBReqTypes_Reads */
1455             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1456             break;
1457         case 2: /* MBReqTypes_Writes */
1458             bar = TCG_BAR_SC | TCG_MO_ST_ST;
1459             break;
1460         default: /* MBReqTypes_All */
1461             bar = TCG_BAR_SC | TCG_MO_ALL;
1462             break;
1463         }
1464         tcg_gen_mb(bar);
1465         return;
1466     case 6: /* ISB */
1467         /* We need to break the TB after this insn to execute
1468          * a self-modified code correctly and also to take
1469          * any pending interrupts immediately.
1470          */
1471         gen_goto_tb(s, 0, s->pc);
1472         return;
1473     default:
1474         unallocated_encoding(s);
1475         return;
1476     }
1477 }
1478
1479 /* MSR (immediate) - move immediate to processor state field */
1480 static void handle_msr_i(DisasContext *s, uint32_t insn,
1481                          unsigned int op1, unsigned int op2, unsigned int crm)
1482 {
1483     int op = op1 << 3 | op2;
1484     switch (op) {
1485     case 0x05: /* SPSel */
1486         if (s->current_el == 0) {
1487             unallocated_encoding(s);
1488             return;
1489         }
1490         /* fall through */
1491     case 0x1e: /* DAIFSet */
1492     case 0x1f: /* DAIFClear */
1493     {
1494         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1495         TCGv_i32 tcg_op = tcg_const_i32(op);
1496         gen_a64_set_pc_im(s->pc - 4);
1497         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1498         tcg_temp_free_i32(tcg_imm);
1499         tcg_temp_free_i32(tcg_op);
1500         /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1501         gen_a64_set_pc_im(s->pc);
1502         s->base.is_jmp = (op == 0x1f ? DISAS_EXIT : DISAS_JUMP);
1503         break;
1504     }
1505     default:
1506         unallocated_encoding(s);
1507         return;
1508     }
1509 }
1510
1511 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1512 {
1513     TCGv_i32 tmp = tcg_temp_new_i32();
1514     TCGv_i32 nzcv = tcg_temp_new_i32();
1515
1516     /* build bit 31, N */
1517     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1518     /* build bit 30, Z */
1519     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1520     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1521     /* build bit 29, C */
1522     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1523     /* build bit 28, V */
1524     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1525     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1526     /* generate result */
1527     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1528
1529     tcg_temp_free_i32(nzcv);
1530     tcg_temp_free_i32(tmp);
1531 }
1532
1533 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1534
1535 {
1536     TCGv_i32 nzcv = tcg_temp_new_i32();
1537
1538     /* take NZCV from R[t] */
1539     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1540
1541     /* bit 31, N */
1542     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1543     /* bit 30, Z */
1544     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1545     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1546     /* bit 29, C */
1547     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1548     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1549     /* bit 28, V */
1550     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1551     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1552     tcg_temp_free_i32(nzcv);
1553 }
1554
1555 /* MRS - move from system register
1556  * MSR (register) - move to system register
1557  * SYS
1558  * SYSL
1559  * These are all essentially the same insn in 'read' and 'write'
1560  * versions, with varying op0 fields.
1561  */
1562 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1563                        unsigned int op0, unsigned int op1, unsigned int op2,
1564                        unsigned int crn, unsigned int crm, unsigned int rt)
1565 {
1566     const ARMCPRegInfo *ri;
1567     TCGv_i64 tcg_rt;
1568
1569     ri = get_arm_cp_reginfo(s->cp_regs,
1570                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1571                                                crn, crm, op0, op1, op2));
1572
1573     if (!ri) {
1574         /* Unknown register; this might be a guest error or a QEMU
1575          * unimplemented feature.
1576          */
1577         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1578                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1579                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1580         unallocated_encoding(s);
1581         return;
1582     }
1583
1584     /* Check access permissions */
1585     if (!cp_access_ok(s->current_el, ri, isread)) {
1586         unallocated_encoding(s);
1587         return;
1588     }
1589
1590     if (ri->accessfn) {
1591         /* Emit code to perform further access permissions checks at
1592          * runtime; this may result in an exception.
1593          */
1594         TCGv_ptr tmpptr;
1595         TCGv_i32 tcg_syn, tcg_isread;
1596         uint32_t syndrome;
1597
1598         gen_a64_set_pc_im(s->pc - 4);
1599         tmpptr = tcg_const_ptr(ri);
1600         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1601         tcg_syn = tcg_const_i32(syndrome);
1602         tcg_isread = tcg_const_i32(isread);
1603         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1604         tcg_temp_free_ptr(tmpptr);
1605         tcg_temp_free_i32(tcg_syn);
1606         tcg_temp_free_i32(tcg_isread);
1607     }
1608
1609     /* Handle special cases first */
1610     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1611     case ARM_CP_NOP:
1612         return;
1613     case ARM_CP_NZCV:
1614         tcg_rt = cpu_reg(s, rt);
1615         if (isread) {
1616             gen_get_nzcv(tcg_rt);
1617         } else {
1618             gen_set_nzcv(tcg_rt);
1619         }
1620         return;
1621     case ARM_CP_CURRENTEL:
1622         /* Reads as current EL value from pstate, which is
1623          * guaranteed to be constant by the tb flags.
1624          */
1625         tcg_rt = cpu_reg(s, rt);
1626         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1627         return;
1628     case ARM_CP_DC_ZVA:
1629         /* Writes clear the aligned block of memory which rt points into. */
1630         tcg_rt = cpu_reg(s, rt);
1631         gen_helper_dc_zva(cpu_env, tcg_rt);
1632         return;
1633     default:
1634         break;
1635     }
1636     if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1637         return;
1638     }
1639     if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
1640         return;
1641     }
1642
1643     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1644         gen_io_start();
1645     }
1646
1647     tcg_rt = cpu_reg(s, rt);
1648
1649     if (isread) {
1650         if (ri->type & ARM_CP_CONST) {
1651             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1652         } else if (ri->readfn) {
1653             TCGv_ptr tmpptr;
1654             tmpptr = tcg_const_ptr(ri);
1655             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1656             tcg_temp_free_ptr(tmpptr);
1657         } else {
1658             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1659         }
1660     } else {
1661         if (ri->type & ARM_CP_CONST) {
1662             /* If not forbidden by access permissions, treat as WI */
1663             return;
1664         } else if (ri->writefn) {
1665             TCGv_ptr tmpptr;
1666             tmpptr = tcg_const_ptr(ri);
1667             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1668             tcg_temp_free_ptr(tmpptr);
1669         } else {
1670             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1671         }
1672     }
1673
1674     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1675         /* I/O operations must end the TB here (whether read or write) */
1676         gen_io_end();
1677         s->base.is_jmp = DISAS_UPDATE;
1678     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1679         /* We default to ending the TB on a coprocessor register write,
1680          * but allow this to be suppressed by the register definition
1681          * (usually only necessary to work around guest bugs).
1682          */
1683         s->base.is_jmp = DISAS_UPDATE;
1684     }
1685 }
1686
1687 /* System
1688  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1689  * +---------------------+---+-----+-----+-------+-------+-----+------+
1690  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1691  * +---------------------+---+-----+-----+-------+-------+-----+------+
1692  */
1693 static void disas_system(DisasContext *s, uint32_t insn)
1694 {
1695     unsigned int l, op0, op1, crn, crm, op2, rt;
1696     l = extract32(insn, 21, 1);
1697     op0 = extract32(insn, 19, 2);
1698     op1 = extract32(insn, 16, 3);
1699     crn = extract32(insn, 12, 4);
1700     crm = extract32(insn, 8, 4);
1701     op2 = extract32(insn, 5, 3);
1702     rt = extract32(insn, 0, 5);
1703
1704     if (op0 == 0) {
1705         if (l || rt != 31) {
1706             unallocated_encoding(s);
1707             return;
1708         }
1709         switch (crn) {
1710         case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
1711             handle_hint(s, insn, op1, op2, crm);
1712             break;
1713         case 3: /* CLREX, DSB, DMB, ISB */
1714             handle_sync(s, insn, op1, op2, crm);
1715             break;
1716         case 4: /* MSR (immediate) */
1717             handle_msr_i(s, insn, op1, op2, crm);
1718             break;
1719         default:
1720             unallocated_encoding(s);
1721             break;
1722         }
1723         return;
1724     }
1725     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1726 }
1727
1728 /* Exception generation
1729  *
1730  *  31             24 23 21 20                     5 4   2 1  0
1731  * +-----------------+-----+------------------------+-----+----+
1732  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1733  * +-----------------------+------------------------+----------+
1734  */
1735 static void disas_exc(DisasContext *s, uint32_t insn)
1736 {
1737     int opc = extract32(insn, 21, 3);
1738     int op2_ll = extract32(insn, 0, 5);
1739     int imm16 = extract32(insn, 5, 16);
1740     TCGv_i32 tmp;
1741
1742     switch (opc) {
1743     case 0:
1744         /* For SVC, HVC and SMC we advance the single-step state
1745          * machine before taking the exception. This is architecturally
1746          * mandated, to ensure that single-stepping a system call
1747          * instruction works properly.
1748          */
1749         switch (op2_ll) {
1750         case 1:                                                     /* SVC */
1751             gen_ss_advance(s);
1752             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1753                                default_exception_el(s));
1754             break;
1755         case 2:                                                     /* HVC */
1756             if (s->current_el == 0) {
1757                 unallocated_encoding(s);
1758                 break;
1759             }
1760             /* The pre HVC helper handles cases when HVC gets trapped
1761              * as an undefined insn by runtime configuration.
1762              */
1763             gen_a64_set_pc_im(s->pc - 4);
1764             gen_helper_pre_hvc(cpu_env);
1765             gen_ss_advance(s);
1766             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1767             break;
1768         case 3:                                                     /* SMC */
1769             if (s->current_el == 0) {
1770                 unallocated_encoding(s);
1771                 break;
1772             }
1773             gen_a64_set_pc_im(s->pc - 4);
1774             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1775             gen_helper_pre_smc(cpu_env, tmp);
1776             tcg_temp_free_i32(tmp);
1777             gen_ss_advance(s);
1778             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1779             break;
1780         default:
1781             unallocated_encoding(s);
1782             break;
1783         }
1784         break;
1785     case 1:
1786         if (op2_ll != 0) {
1787             unallocated_encoding(s);
1788             break;
1789         }
1790         /* BRK */
1791         gen_exception_bkpt_insn(s, 4, syn_aa64_bkpt(imm16));
1792         break;
1793     case 2:
1794         if (op2_ll != 0) {
1795             unallocated_encoding(s);
1796             break;
1797         }
1798         /* HLT. This has two purposes.
1799          * Architecturally, it is an external halting debug instruction.
1800          * Since QEMU doesn't implement external debug, we treat this as
1801          * it is required for halting debug disabled: it will UNDEF.
1802          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1803          */
1804         if (semihosting_enabled() && imm16 == 0xf000) {
1805 #ifndef CONFIG_USER_ONLY
1806             /* In system mode, don't allow userspace access to semihosting,
1807              * to provide some semblance of security (and for consistency
1808              * with our 32-bit semihosting).
1809              */
1810             if (s->current_el == 0) {
1811                 unsupported_encoding(s, insn);
1812                 break;
1813             }
1814 #endif
1815             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1816         } else {
1817             unsupported_encoding(s, insn);
1818         }
1819         break;
1820     case 5:
1821         if (op2_ll < 1 || op2_ll > 3) {
1822             unallocated_encoding(s);
1823             break;
1824         }
1825         /* DCPS1, DCPS2, DCPS3 */
1826         unsupported_encoding(s, insn);
1827         break;
1828     default:
1829         unallocated_encoding(s);
1830         break;
1831     }
1832 }
1833
1834 /* Unconditional branch (register)
1835  *  31           25 24   21 20   16 15   10 9    5 4     0
1836  * +---------------+-------+-------+-------+------+-------+
1837  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1838  * +---------------+-------+-------+-------+------+-------+
1839  */
1840 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1841 {
1842     unsigned int opc, op2, op3, rn, op4;
1843
1844     opc = extract32(insn, 21, 4);
1845     op2 = extract32(insn, 16, 5);
1846     op3 = extract32(insn, 10, 6);
1847     rn = extract32(insn, 5, 5);
1848     op4 = extract32(insn, 0, 5);
1849
1850     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1851         unallocated_encoding(s);
1852         return;
1853     }
1854
1855     switch (opc) {
1856     case 0: /* BR */
1857     case 1: /* BLR */
1858     case 2: /* RET */
1859         gen_a64_set_pc(s, cpu_reg(s, rn));
1860         /* BLR also needs to load return address */
1861         if (opc == 1) {
1862             tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1863         }
1864         break;
1865     case 4: /* ERET */
1866         if (s->current_el == 0) {
1867             unallocated_encoding(s);
1868             return;
1869         }
1870         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1871             gen_io_start();
1872         }
1873         gen_helper_exception_return(cpu_env);
1874         if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
1875             gen_io_end();
1876         }
1877         /* Must exit loop to check un-masked IRQs */
1878         s->base.is_jmp = DISAS_EXIT;
1879         return;
1880     case 5: /* DRPS */
1881         if (rn != 0x1f) {
1882             unallocated_encoding(s);
1883         } else {
1884             unsupported_encoding(s, insn);
1885         }
1886         return;
1887     default:
1888         unallocated_encoding(s);
1889         return;
1890     }
1891
1892     s->base.is_jmp = DISAS_JUMP;
1893 }
1894
1895 /* Branches, exception generating and system instructions */
1896 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1897 {
1898     switch (extract32(insn, 25, 7)) {
1899     case 0x0a: case 0x0b:
1900     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1901         disas_uncond_b_imm(s, insn);
1902         break;
1903     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1904         disas_comp_b_imm(s, insn);
1905         break;
1906     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1907         disas_test_b_imm(s, insn);
1908         break;
1909     case 0x2a: /* Conditional branch (immediate) */
1910         disas_cond_b_imm(s, insn);
1911         break;
1912     case 0x6a: /* Exception generation / System */
1913         if (insn & (1 << 24)) {
1914             disas_system(s, insn);
1915         } else {
1916             disas_exc(s, insn);
1917         }
1918         break;
1919     case 0x6b: /* Unconditional branch (register) */
1920         disas_uncond_b_reg(s, insn);
1921         break;
1922     default:
1923         unallocated_encoding(s);
1924         break;
1925     }
1926 }
1927
1928 /*
1929  * Load/Store exclusive instructions are implemented by remembering
1930  * the value/address loaded, and seeing if these are the same
1931  * when the store is performed. This is not actually the architecturally
1932  * mandated semantics, but it works for typical guest code sequences
1933  * and avoids having to monitor regular stores.
1934  *
1935  * The store exclusive uses the atomic cmpxchg primitives to avoid
1936  * races in multi-threaded linux-user and when MTTCG softmmu is
1937  * enabled.
1938  */
1939 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1940                                TCGv_i64 addr, int size, bool is_pair)
1941 {
1942     int idx = get_mem_index(s);
1943     TCGMemOp memop = s->be_data;
1944
1945     g_assert(size <= 3);
1946     if (is_pair) {
1947         g_assert(size >= 2);
1948         if (size == 2) {
1949             /* The pair must be single-copy atomic for the doubleword.  */
1950             memop |= MO_64 | MO_ALIGN;
1951             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
1952             if (s->be_data == MO_LE) {
1953                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
1954                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
1955             } else {
1956                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
1957                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
1958             }
1959         } else {
1960             /* The pair must be single-copy atomic for *each* doubleword, not
1961                the entire quadword, however it must be quadword aligned.  */
1962             memop |= MO_64;
1963             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
1964                                 memop | MO_ALIGN_16);
1965
1966             TCGv_i64 addr2 = tcg_temp_new_i64();
1967             tcg_gen_addi_i64(addr2, addr, 8);
1968             tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
1969             tcg_temp_free_i64(addr2);
1970
1971             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
1972             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
1973         }
1974     } else {
1975         memop |= size | MO_ALIGN;
1976         tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
1977         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
1978     }
1979     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
1980 }
1981
1982 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
1983                                 TCGv_i64 addr, int size, int is_pair)
1984 {
1985     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
1986      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
1987      *     [addr] = {Rt};
1988      *     if (is_pair) {
1989      *         [addr + datasize] = {Rt2};
1990      *     }
1991      *     {Rd} = 0;
1992      * } else {
1993      *     {Rd} = 1;
1994      * }
1995      * env->exclusive_addr = -1;
1996      */
1997     TCGLabel *fail_label = gen_new_label();
1998     TCGLabel *done_label = gen_new_label();
1999     TCGv_i64 tmp;
2000
2001     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2002
2003     tmp = tcg_temp_new_i64();
2004     if (is_pair) {
2005         if (size == 2) {
2006             if (s->be_data == MO_LE) {
2007                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2008             } else {
2009                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2010             }
2011             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2012                                        cpu_exclusive_val, tmp,
2013                                        get_mem_index(s),
2014                                        MO_64 | MO_ALIGN | s->be_data);
2015             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2016         } else if (s->be_data == MO_LE) {
2017             if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2018                 gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
2019                                                         cpu_exclusive_addr,
2020                                                         cpu_reg(s, rt),
2021                                                         cpu_reg(s, rt2));
2022             } else {
2023                 gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
2024                                                cpu_reg(s, rt), cpu_reg(s, rt2));
2025             }
2026         } else {
2027             if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2028                 gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
2029                                                         cpu_exclusive_addr,
2030                                                         cpu_reg(s, rt),
2031                                                         cpu_reg(s, rt2));
2032             } else {
2033                 gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
2034                                                cpu_reg(s, rt), cpu_reg(s, rt2));
2035             }
2036         }
2037     } else {
2038         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2039                                    cpu_reg(s, rt), get_mem_index(s),
2040                                    size | MO_ALIGN | s->be_data);
2041         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2042     }
2043     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2044     tcg_temp_free_i64(tmp);
2045     tcg_gen_br(done_label);
2046
2047     gen_set_label(fail_label);
2048     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2049     gen_set_label(done_label);
2050     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2051 }
2052
2053 static void gen_compare_and_swap(DisasContext *s, int rs, int rt,
2054                                  int rn, int size)
2055 {
2056     TCGv_i64 tcg_rs = cpu_reg(s, rs);
2057     TCGv_i64 tcg_rt = cpu_reg(s, rt);
2058     int memidx = get_mem_index(s);
2059     TCGv_i64 addr = cpu_reg_sp(s, rn);
2060
2061     if (rn == 31) {
2062         gen_check_sp_alignment(s);
2063     }
2064     tcg_gen_atomic_cmpxchg_i64(tcg_rs, addr, tcg_rs, tcg_rt, memidx,
2065                                size | MO_ALIGN | s->be_data);
2066 }
2067
2068 static void gen_compare_and_swap_pair(DisasContext *s, int rs, int rt,
2069                                       int rn, int size)
2070 {
2071     TCGv_i64 s1 = cpu_reg(s, rs);
2072     TCGv_i64 s2 = cpu_reg(s, rs + 1);
2073     TCGv_i64 t1 = cpu_reg(s, rt);
2074     TCGv_i64 t2 = cpu_reg(s, rt + 1);
2075     TCGv_i64 addr = cpu_reg_sp(s, rn);
2076     int memidx = get_mem_index(s);
2077
2078     if (rn == 31) {
2079         gen_check_sp_alignment(s);
2080     }
2081
2082     if (size == 2) {
2083         TCGv_i64 cmp = tcg_temp_new_i64();
2084         TCGv_i64 val = tcg_temp_new_i64();
2085
2086         if (s->be_data == MO_LE) {
2087             tcg_gen_concat32_i64(val, t1, t2);
2088             tcg_gen_concat32_i64(cmp, s1, s2);
2089         } else {
2090             tcg_gen_concat32_i64(val, t2, t1);
2091             tcg_gen_concat32_i64(cmp, s2, s1);
2092         }
2093
2094         tcg_gen_atomic_cmpxchg_i64(cmp, addr, cmp, val, memidx,
2095                                    MO_64 | MO_ALIGN | s->be_data);
2096         tcg_temp_free_i64(val);
2097
2098         if (s->be_data == MO_LE) {
2099             tcg_gen_extr32_i64(s1, s2, cmp);
2100         } else {
2101             tcg_gen_extr32_i64(s2, s1, cmp);
2102         }
2103         tcg_temp_free_i64(cmp);
2104     } else if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2105         TCGv_i32 tcg_rs = tcg_const_i32(rs);
2106
2107         if (s->be_data == MO_LE) {
2108             gen_helper_casp_le_parallel(cpu_env, tcg_rs, addr, t1, t2);
2109         } else {
2110             gen_helper_casp_be_parallel(cpu_env, tcg_rs, addr, t1, t2);
2111         }
2112         tcg_temp_free_i32(tcg_rs);
2113     } else {
2114         TCGv_i64 d1 = tcg_temp_new_i64();
2115         TCGv_i64 d2 = tcg_temp_new_i64();
2116         TCGv_i64 a2 = tcg_temp_new_i64();
2117         TCGv_i64 c1 = tcg_temp_new_i64();
2118         TCGv_i64 c2 = tcg_temp_new_i64();
2119         TCGv_i64 zero = tcg_const_i64(0);
2120
2121         /* Load the two words, in memory order.  */
2122         tcg_gen_qemu_ld_i64(d1, addr, memidx,
2123                             MO_64 | MO_ALIGN_16 | s->be_data);
2124         tcg_gen_addi_i64(a2, addr, 8);
2125         tcg_gen_qemu_ld_i64(d2, addr, memidx, MO_64 | s->be_data);
2126
2127         /* Compare the two words, also in memory order.  */
2128         tcg_gen_setcond_i64(TCG_COND_EQ, c1, d1, s1);
2129         tcg_gen_setcond_i64(TCG_COND_EQ, c2, d2, s2);
2130         tcg_gen_and_i64(c2, c2, c1);
2131
2132         /* If compare equal, write back new data, else write back old data.  */
2133         tcg_gen_movcond_i64(TCG_COND_NE, c1, c2, zero, t1, d1);
2134         tcg_gen_movcond_i64(TCG_COND_NE, c2, c2, zero, t2, d2);
2135         tcg_gen_qemu_st_i64(c1, addr, memidx, MO_64 | s->be_data);
2136         tcg_gen_qemu_st_i64(c2, a2, memidx, MO_64 | s->be_data);
2137         tcg_temp_free_i64(a2);
2138         tcg_temp_free_i64(c1);
2139         tcg_temp_free_i64(c2);
2140         tcg_temp_free_i64(zero);
2141
2142         /* Write back the data from memory to Rs.  */
2143         tcg_gen_mov_i64(s1, d1);
2144         tcg_gen_mov_i64(s2, d2);
2145         tcg_temp_free_i64(d1);
2146         tcg_temp_free_i64(d2);
2147     }
2148 }
2149
2150 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2151  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2152  */
2153 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2154 {
2155     int opc0 = extract32(opc, 0, 1);
2156     int regsize;
2157
2158     if (is_signed) {
2159         regsize = opc0 ? 32 : 64;
2160     } else {
2161         regsize = size == 3 ? 64 : 32;
2162     }
2163     return regsize == 64;
2164 }
2165
2166 /* Load/store exclusive
2167  *
2168  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2169  * +-----+-------------+----+---+----+------+----+-------+------+------+
2170  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2171  * +-----+-------------+----+---+----+------+----+-------+------+------+
2172  *
2173  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2174  *   L: 0 -> store, 1 -> load
2175  *  o2: 0 -> exclusive, 1 -> not
2176  *  o1: 0 -> single register, 1 -> register pair
2177  *  o0: 1 -> load-acquire/store-release, 0 -> not
2178  */
2179 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2180 {
2181     int rt = extract32(insn, 0, 5);
2182     int rn = extract32(insn, 5, 5);
2183     int rt2 = extract32(insn, 10, 5);
2184     int rs = extract32(insn, 16, 5);
2185     int is_lasr = extract32(insn, 15, 1);
2186     int o2_L_o1_o0 = extract32(insn, 21, 3) * 2 | is_lasr;
2187     int size = extract32(insn, 30, 2);
2188     TCGv_i64 tcg_addr;
2189
2190     switch (o2_L_o1_o0) {
2191     case 0x0: /* STXR */
2192     case 0x1: /* STLXR */
2193         if (rn == 31) {
2194             gen_check_sp_alignment(s);
2195         }
2196         if (is_lasr) {
2197             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2198         }
2199         tcg_addr = read_cpu_reg_sp(s, rn, 1);
2200         gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, false);
2201         return;
2202
2203     case 0x4: /* LDXR */
2204     case 0x5: /* LDAXR */
2205         if (rn == 31) {
2206             gen_check_sp_alignment(s);
2207         }
2208         tcg_addr = read_cpu_reg_sp(s, rn, 1);
2209         s->is_ldex = true;
2210         gen_load_exclusive(s, rt, rt2, tcg_addr, size, false);
2211         if (is_lasr) {
2212             tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2213         }
2214         return;
2215
2216     case 0x9: /* STLR */
2217         /* Generate ISS for non-exclusive accesses including LASR.  */
2218         if (rn == 31) {
2219             gen_check_sp_alignment(s);
2220         }
2221         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2222         tcg_addr = read_cpu_reg_sp(s, rn, 1);
2223         do_gpr_st(s, cpu_reg(s, rt), tcg_addr, size, true, rt,
2224                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2225         return;
2226
2227     case 0xd: /* LDAR */
2228         /* Generate ISS for non-exclusive accesses including LASR.  */
2229         if (rn == 31) {
2230             gen_check_sp_alignment(s);
2231         }
2232         tcg_addr = read_cpu_reg_sp(s, rn, 1);
2233         do_gpr_ld(s, cpu_reg(s, rt), tcg_addr, size, false, false, true, rt,
2234                   disas_ldst_compute_iss_sf(size, false, 0), is_lasr);
2235         tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2236         return;
2237
2238     case 0x2: case 0x3: /* CASP / STXP */
2239         if (size & 2) { /* STXP / STLXP */
2240             if (rn == 31) {
2241                 gen_check_sp_alignment(s);
2242             }
2243             if (is_lasr) {
2244                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2245             }
2246             tcg_addr = read_cpu_reg_sp(s, rn, 1);
2247             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, true);
2248             return;
2249         }
2250         if (rt2 == 31
2251             && ((rt | rs) & 1) == 0
2252             && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
2253             /* CASP / CASPL */
2254             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2255             return;
2256         }
2257         break;
2258
2259     case 0x6: case 0x7: /* CASPA / LDXP */
2260         if (size & 2) { /* LDXP / LDAXP */
2261             if (rn == 31) {
2262                 gen_check_sp_alignment(s);
2263             }
2264             tcg_addr = read_cpu_reg_sp(s, rn, 1);
2265             s->is_ldex = true;
2266             gen_load_exclusive(s, rt, rt2, tcg_addr, size, true);
2267             if (is_lasr) {
2268                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2269             }
2270             return;
2271         }
2272         if (rt2 == 31
2273             && ((rt | rs) & 1) == 0
2274             && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
2275             /* CASPA / CASPAL */
2276             gen_compare_and_swap_pair(s, rs, rt, rn, size | 2);
2277             return;
2278         }
2279         break;
2280
2281     case 0xa: /* CAS */
2282     case 0xb: /* CASL */
2283     case 0xe: /* CASA */
2284     case 0xf: /* CASAL */
2285         if (rt2 == 31 && arm_dc_feature(s, ARM_FEATURE_V8_ATOMICS)) {
2286             gen_compare_and_swap(s, rs, rt, rn, size);
2287             return;
2288         }
2289         break;
2290     }
2291     unallocated_encoding(s);
2292 }
2293
2294 /*
2295  * Load register (literal)
2296  *
2297  *  31 30 29   27  26 25 24 23                5 4     0
2298  * +-----+-------+---+-----+-------------------+-------+
2299  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2300  * +-----+-------+---+-----+-------------------+-------+
2301  *
2302  * V: 1 -> vector (simd/fp)
2303  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2304  *                   10-> 32 bit signed, 11 -> prefetch
2305  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2306  */
2307 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2308 {
2309     int rt = extract32(insn, 0, 5);
2310     int64_t imm = sextract32(insn, 5, 19) << 2;
2311     bool is_vector = extract32(insn, 26, 1);
2312     int opc = extract32(insn, 30, 2);
2313     bool is_signed = false;
2314     int size = 2;
2315     TCGv_i64 tcg_rt, tcg_addr;
2316
2317     if (is_vector) {
2318         if (opc == 3) {
2319             unallocated_encoding(s);
2320             return;
2321         }
2322         size = 2 + opc;
2323         if (!fp_access_check(s)) {
2324             return;
2325         }
2326     } else {
2327         if (opc == 3) {
2328             /* PRFM (literal) : prefetch */
2329             return;
2330         }
2331         size = 2 + extract32(opc, 0, 1);
2332         is_signed = extract32(opc, 1, 1);
2333     }
2334
2335     tcg_rt = cpu_reg(s, rt);
2336
2337     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
2338     if (is_vector) {
2339         do_fp_ld(s, rt, tcg_addr, size);
2340     } else {
2341         /* Only unsigned 32bit loads target 32bit registers.  */
2342         bool iss_sf = opc != 0;
2343
2344         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2345                   true, rt, iss_sf, false);
2346     }
2347     tcg_temp_free_i64(tcg_addr);
2348 }
2349
2350 /*
2351  * LDNP (Load Pair - non-temporal hint)
2352  * LDP (Load Pair - non vector)
2353  * LDPSW (Load Pair Signed Word - non vector)
2354  * STNP (Store Pair - non-temporal hint)
2355  * STP (Store Pair - non vector)
2356  * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2357  * LDP (Load Pair of SIMD&FP)
2358  * STNP (Store Pair of SIMD&FP - non-temporal hint)
2359  * STP (Store Pair of SIMD&FP)
2360  *
2361  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2362  * +-----+-------+---+---+-------+---+-----------------------------+
2363  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2364  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2365  *
2366  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2367  *      LDPSW                    01
2368  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2369  *   V: 0 -> GPR, 1 -> Vector
2370  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2371  *      10 -> signed offset, 11 -> pre-index
2372  *   L: 0 -> Store 1 -> Load
2373  *
2374  * Rt, Rt2 = GPR or SIMD registers to be stored
2375  * Rn = general purpose register containing address
2376  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2377  */
2378 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2379 {
2380     int rt = extract32(insn, 0, 5);
2381     int rn = extract32(insn, 5, 5);
2382     int rt2 = extract32(insn, 10, 5);
2383     uint64_t offset = sextract64(insn, 15, 7);
2384     int index = extract32(insn, 23, 2);
2385     bool is_vector = extract32(insn, 26, 1);
2386     bool is_load = extract32(insn, 22, 1);
2387     int opc = extract32(insn, 30, 2);
2388
2389     bool is_signed = false;
2390     bool postindex = false;
2391     bool wback = false;
2392
2393     TCGv_i64 tcg_addr; /* calculated address */
2394     int size;
2395
2396     if (opc == 3) {
2397         unallocated_encoding(s);
2398         return;
2399     }
2400
2401     if (is_vector) {
2402         size = 2 + opc;
2403     } else {
2404         size = 2 + extract32(opc, 1, 1);
2405         is_signed = extract32(opc, 0, 1);
2406         if (!is_load && is_signed) {
2407             unallocated_encoding(s);
2408             return;
2409         }
2410     }
2411
2412     switch (index) {
2413     case 1: /* post-index */
2414         postindex = true;
2415         wback = true;
2416         break;
2417     case 0:
2418         /* signed offset with "non-temporal" hint. Since we don't emulate
2419          * caches we don't care about hints to the cache system about
2420          * data access patterns, and handle this identically to plain
2421          * signed offset.
2422          */
2423         if (is_signed) {
2424             /* There is no non-temporal-hint version of LDPSW */
2425             unallocated_encoding(s);
2426             return;
2427         }
2428         postindex = false;
2429         break;
2430     case 2: /* signed offset, rn not updated */
2431         postindex = false;
2432         break;
2433     case 3: /* pre-index */
2434         postindex = false;
2435         wback = true;
2436         break;
2437     }
2438
2439     if (is_vector && !fp_access_check(s)) {
2440         return;
2441     }
2442
2443     offset <<= size;
2444
2445     if (rn == 31) {
2446         gen_check_sp_alignment(s);
2447     }
2448
2449     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2450
2451     if (!postindex) {
2452         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2453     }
2454
2455     if (is_vector) {
2456         if (is_load) {
2457             do_fp_ld(s, rt, tcg_addr, size);
2458         } else {
2459             do_fp_st(s, rt, tcg_addr, size);
2460         }
2461         tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2462         if (is_load) {
2463             do_fp_ld(s, rt2, tcg_addr, size);
2464         } else {
2465             do_fp_st(s, rt2, tcg_addr, size);
2466         }
2467     } else {
2468         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2469         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2470
2471         if (is_load) {
2472             TCGv_i64 tmp = tcg_temp_new_i64();
2473
2474             /* Do not modify tcg_rt before recognizing any exception
2475              * from the second load.
2476              */
2477             do_gpr_ld(s, tmp, tcg_addr, size, is_signed, false,
2478                       false, 0, false, false);
2479             tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2480             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
2481                       false, 0, false, false);
2482
2483             tcg_gen_mov_i64(tcg_rt, tmp);
2484             tcg_temp_free_i64(tmp);
2485         } else {
2486             do_gpr_st(s, tcg_rt, tcg_addr, size,
2487                       false, 0, false, false);
2488             tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2489             do_gpr_st(s, tcg_rt2, tcg_addr, size,
2490                       false, 0, false, false);
2491         }
2492     }
2493
2494     if (wback) {
2495         if (postindex) {
2496             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2497         } else {
2498             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2499         }
2500         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2501     }
2502 }
2503
2504 /*
2505  * Load/store (immediate post-indexed)
2506  * Load/store (immediate pre-indexed)
2507  * Load/store (unscaled immediate)
2508  *
2509  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2510  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2511  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2512  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2513  *
2514  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2515          10 -> unprivileged
2516  * V = 0 -> non-vector
2517  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2518  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2519  */
2520 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2521                                 int opc,
2522                                 int size,
2523                                 int rt,
2524                                 bool is_vector)
2525 {
2526     int rn = extract32(insn, 5, 5);
2527     int imm9 = sextract32(insn, 12, 9);
2528     int idx = extract32(insn, 10, 2);
2529     bool is_signed = false;
2530     bool is_store = false;
2531     bool is_extended = false;
2532     bool is_unpriv = (idx == 2);
2533     bool iss_valid = !is_vector;
2534     bool post_index;
2535     bool writeback;
2536
2537     TCGv_i64 tcg_addr;
2538
2539     if (is_vector) {
2540         size |= (opc & 2) << 1;
2541         if (size > 4 || is_unpriv) {
2542             unallocated_encoding(s);
2543             return;
2544         }
2545         is_store = ((opc & 1) == 0);
2546         if (!fp_access_check(s)) {
2547             return;
2548         }
2549     } else {
2550         if (size == 3 && opc == 2) {
2551             /* PRFM - prefetch */
2552             if (is_unpriv) {
2553                 unallocated_encoding(s);
2554                 return;
2555             }
2556             return;
2557         }
2558         if (opc == 3 && size > 1) {
2559             unallocated_encoding(s);
2560             return;
2561         }
2562         is_store = (opc == 0);
2563         is_signed = extract32(opc, 1, 1);
2564         is_extended = (size < 3) && extract32(opc, 0, 1);
2565     }
2566
2567     switch (idx) {
2568     case 0:
2569     case 2:
2570         post_index = false;
2571         writeback = false;
2572         break;
2573     case 1:
2574         post_index = true;
2575         writeback = true;
2576         break;
2577     case 3:
2578         post_index = false;
2579         writeback = true;
2580         break;
2581     default:
2582         g_assert_not_reached();
2583     }
2584
2585     if (rn == 31) {
2586         gen_check_sp_alignment(s);
2587     }
2588     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2589
2590     if (!post_index) {
2591         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2592     }
2593
2594     if (is_vector) {
2595         if (is_store) {
2596             do_fp_st(s, rt, tcg_addr, size);
2597         } else {
2598             do_fp_ld(s, rt, tcg_addr, size);
2599         }
2600     } else {
2601         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2602         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2603         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2604
2605         if (is_store) {
2606             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
2607                              iss_valid, rt, iss_sf, false);
2608         } else {
2609             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2610                              is_signed, is_extended, memidx,
2611                              iss_valid, rt, iss_sf, false);
2612         }
2613     }
2614
2615     if (writeback) {
2616         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2617         if (post_index) {
2618             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2619         }
2620         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2621     }
2622 }
2623
2624 /*
2625  * Load/store (register offset)
2626  *
2627  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2628  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2629  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2630  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2631  *
2632  * For non-vector:
2633  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2634  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2635  * For vector:
2636  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2637  *   opc<0>: 0 -> store, 1 -> load
2638  * V: 1 -> vector/simd
2639  * opt: extend encoding (see DecodeRegExtend)
2640  * S: if S=1 then scale (essentially index by sizeof(size))
2641  * Rt: register to transfer into/out of
2642  * Rn: address register or SP for base
2643  * Rm: offset register or ZR for offset
2644  */
2645 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2646                                    int opc,
2647                                    int size,
2648                                    int rt,
2649                                    bool is_vector)
2650 {
2651     int rn = extract32(insn, 5, 5);
2652     int shift = extract32(insn, 12, 1);
2653     int rm = extract32(insn, 16, 5);
2654     int opt = extract32(insn, 13, 3);
2655     bool is_signed = false;
2656     bool is_store = false;
2657     bool is_extended = false;
2658
2659     TCGv_i64 tcg_rm;
2660     TCGv_i64 tcg_addr;
2661
2662     if (extract32(opt, 1, 1) == 0) {
2663         unallocated_encoding(s);
2664         return;
2665     }
2666
2667     if (is_vector) {
2668         size |= (opc & 2) << 1;
2669         if (size > 4) {
2670             unallocated_encoding(s);
2671             return;
2672         }
2673         is_store = !extract32(opc, 0, 1);
2674         if (!fp_access_check(s)) {
2675             return;
2676         }
2677     } else {
2678         if (size == 3 && opc == 2) {
2679             /* PRFM - prefetch */
2680             return;
2681         }
2682         if (opc == 3 && size > 1) {
2683             unallocated_encoding(s);
2684             return;
2685         }
2686         is_store = (opc == 0);
2687         is_signed = extract32(opc, 1, 1);
2688         is_extended = (size < 3) && extract32(opc, 0, 1);
2689     }
2690
2691     if (rn == 31) {
2692         gen_check_sp_alignment(s);
2693     }
2694     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2695
2696     tcg_rm = read_cpu_reg(s, rm, 1);
2697     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2698
2699     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2700
2701     if (is_vector) {
2702         if (is_store) {
2703             do_fp_st(s, rt, tcg_addr, size);
2704         } else {
2705             do_fp_ld(s, rt, tcg_addr, size);
2706         }
2707     } else {
2708         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2709         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2710         if (is_store) {
2711             do_gpr_st(s, tcg_rt, tcg_addr, size,
2712                       true, rt, iss_sf, false);
2713         } else {
2714             do_gpr_ld(s, tcg_rt, tcg_addr, size,
2715                       is_signed, is_extended,
2716                       true, rt, iss_sf, false);
2717         }
2718     }
2719 }
2720
2721 /*
2722  * Load/store (unsigned immediate)
2723  *
2724  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2725  * +----+-------+---+-----+-----+------------+-------+------+
2726  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2727  * +----+-------+---+-----+-----+------------+-------+------+
2728  *
2729  * For non-vector:
2730  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2731  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2732  * For vector:
2733  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2734  *   opc<0>: 0 -> store, 1 -> load
2735  * Rn: base address register (inc SP)
2736  * Rt: target register
2737  */
2738 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
2739                                         int opc,
2740                                         int size,
2741                                         int rt,
2742                                         bool is_vector)
2743 {
2744     int rn = extract32(insn, 5, 5);
2745     unsigned int imm12 = extract32(insn, 10, 12);
2746     unsigned int offset;
2747
2748     TCGv_i64 tcg_addr;
2749
2750     bool is_store;
2751     bool is_signed = false;
2752     bool is_extended = false;
2753
2754     if (is_vector) {
2755         size |= (opc & 2) << 1;
2756         if (size > 4) {
2757             unallocated_encoding(s);
2758             return;
2759         }
2760         is_store = !extract32(opc, 0, 1);
2761         if (!fp_access_check(s)) {
2762             return;
2763         }
2764     } else {
2765         if (size == 3 && opc == 2) {
2766             /* PRFM - prefetch */
2767             return;
2768         }
2769         if (opc == 3 && size > 1) {
2770             unallocated_encoding(s);
2771             return;
2772         }
2773         is_store = (opc == 0);
2774         is_signed = extract32(opc, 1, 1);
2775         is_extended = (size < 3) && extract32(opc, 0, 1);
2776     }
2777
2778     if (rn == 31) {
2779         gen_check_sp_alignment(s);
2780     }
2781     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2782     offset = imm12 << size;
2783     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2784
2785     if (is_vector) {
2786         if (is_store) {
2787             do_fp_st(s, rt, tcg_addr, size);
2788         } else {
2789             do_fp_ld(s, rt, tcg_addr, size);
2790         }
2791     } else {
2792         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2793         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2794         if (is_store) {
2795             do_gpr_st(s, tcg_rt, tcg_addr, size,
2796                       true, rt, iss_sf, false);
2797         } else {
2798             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
2799                       true, rt, iss_sf, false);
2800         }
2801     }
2802 }
2803
2804 /* Atomic memory operations
2805  *
2806  *  31  30      27  26    24    22  21   16   15    12    10    5     0
2807  * +------+-------+---+-----+-----+---+----+----+-----+-----+----+-----+
2808  * | size | 1 1 1 | V | 0 0 | A R | 1 | Rs | o3 | opc | 0 0 | Rn |  Rt |
2809  * +------+-------+---+-----+-----+--------+----+-----+-----+----+-----+
2810  *
2811  * Rt: the result register
2812  * Rn: base address or SP
2813  * Rs: the source register for the operation
2814  * V: vector flag (always 0 as of v8.3)
2815  * A: acquire flag
2816  * R: release flag
2817  */
2818 static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
2819                               int size, int rt, bool is_vector)
2820 {
2821     int rs = extract32(insn, 16, 5);
2822     int rn = extract32(insn, 5, 5);
2823     int o3_opc = extract32(insn, 12, 4);
2824     int feature = ARM_FEATURE_V8_ATOMICS;
2825     TCGv_i64 tcg_rn, tcg_rs;
2826     AtomicThreeOpFn *fn;
2827
2828     if (is_vector) {
2829         unallocated_encoding(s);
2830         return;
2831     }
2832     switch (o3_opc) {
2833     case 000: /* LDADD */
2834         fn = tcg_gen_atomic_fetch_add_i64;
2835         break;
2836     case 001: /* LDCLR */
2837         fn = tcg_gen_atomic_fetch_and_i64;
2838         break;
2839     case 002: /* LDEOR */
2840         fn = tcg_gen_atomic_fetch_xor_i64;
2841         break;
2842     case 003: /* LDSET */
2843         fn = tcg_gen_atomic_fetch_or_i64;
2844         break;
2845     case 004: /* LDSMAX */
2846         fn = tcg_gen_atomic_fetch_smax_i64;
2847         break;
2848     case 005: /* LDSMIN */
2849         fn = tcg_gen_atomic_fetch_smin_i64;
2850         break;
2851     case 006: /* LDUMAX */
2852         fn = tcg_gen_atomic_fetch_umax_i64;
2853         break;
2854     case 007: /* LDUMIN */
2855         fn = tcg_gen_atomic_fetch_umin_i64;
2856         break;
2857     case 010: /* SWP */
2858         fn = tcg_gen_atomic_xchg_i64;
2859         break;
2860     default:
2861         unallocated_encoding(s);
2862         return;
2863     }
2864     if (!arm_dc_feature(s, feature)) {
2865         unallocated_encoding(s);
2866         return;
2867     }
2868
2869     if (rn == 31) {
2870         gen_check_sp_alignment(s);
2871     }
2872     tcg_rn = cpu_reg_sp(s, rn);
2873     tcg_rs = read_cpu_reg(s, rs, true);
2874
2875     if (o3_opc == 1) { /* LDCLR */
2876         tcg_gen_not_i64(tcg_rs, tcg_rs);
2877     }
2878
2879     /* The tcg atomic primitives are all full barriers.  Therefore we
2880      * can ignore the Acquire and Release bits of this instruction.
2881      */
2882     fn(cpu_reg(s, rt), tcg_rn, tcg_rs, get_mem_index(s),
2883        s->be_data | size | MO_ALIGN);
2884 }
2885
2886 /* Load/store register (all forms) */
2887 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2888 {
2889     int rt = extract32(insn, 0, 5);
2890     int opc = extract32(insn, 22, 2);
2891     bool is_vector = extract32(insn, 26, 1);
2892     int size = extract32(insn, 30, 2);
2893
2894     switch (extract32(insn, 24, 2)) {
2895     case 0:
2896         if (extract32(insn, 21, 1) == 0) {
2897             /* Load/store register (unscaled immediate)
2898              * Load/store immediate pre/post-indexed
2899              * Load/store register unprivileged
2900              */
2901             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
2902             return;
2903         }
2904         switch (extract32(insn, 10, 2)) {
2905         case 0:
2906             disas_ldst_atomic(s, insn, size, rt, is_vector);
2907             return;
2908         case 2:
2909             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
2910             return;
2911         }
2912         break;
2913     case 1:
2914         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
2915         return;
2916     }
2917     unallocated_encoding(s);
2918 }
2919
2920 /* AdvSIMD load/store multiple structures
2921  *
2922  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2923  * +---+---+---------------+---+-------------+--------+------+------+------+
2924  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2925  * +---+---+---------------+---+-------------+--------+------+------+------+
2926  *
2927  * AdvSIMD load/store multiple structures (post-indexed)
2928  *
2929  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2930  * +---+---+---------------+---+---+---------+--------+------+------+------+
2931  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2932  * +---+---+---------------+---+---+---------+--------+------+------+------+
2933  *
2934  * Rt: first (or only) SIMD&FP register to be transferred
2935  * Rn: base address or SP
2936  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2937  */
2938 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2939 {
2940     int rt = extract32(insn, 0, 5);
2941     int rn = extract32(insn, 5, 5);
2942     int size = extract32(insn, 10, 2);
2943     int opcode = extract32(insn, 12, 4);
2944     bool is_store = !extract32(insn, 22, 1);
2945     bool is_postidx = extract32(insn, 23, 1);
2946     bool is_q = extract32(insn, 30, 1);
2947     TCGv_i64 tcg_addr, tcg_rn;
2948
2949     int ebytes = 1 << size;
2950     int elements = (is_q ? 128 : 64) / (8 << size);
2951     int rpt;    /* num iterations */
2952     int selem;  /* structure elements */
2953     int r;
2954
2955     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2956         unallocated_encoding(s);
2957         return;
2958     }
2959
2960     /* From the shared decode logic */
2961     switch (opcode) {
2962     case 0x0:
2963         rpt = 1;
2964         selem = 4;
2965         break;
2966     case 0x2:
2967         rpt = 4;
2968         selem = 1;
2969         break;
2970     case 0x4:
2971         rpt = 1;
2972         selem = 3;
2973         break;
2974     case 0x6:
2975         rpt = 3;
2976         selem = 1;
2977         break;
2978     case 0x7:
2979         rpt = 1;
2980         selem = 1;
2981         break;
2982     case 0x8:
2983         rpt = 1;
2984         selem = 2;
2985         break;
2986     case 0xa:
2987         rpt = 2;
2988         selem = 1;
2989         break;
2990     default:
2991         unallocated_encoding(s);
2992         return;
2993     }
2994
2995     if (size == 3 && !is_q && selem != 1) {
2996         /* reserved */
2997         unallocated_encoding(s);
2998         return;
2999     }
3000
3001     if (!fp_access_check(s)) {
3002         return;
3003     }
3004
3005     if (rn == 31) {
3006         gen_check_sp_alignment(s);
3007     }
3008
3009     tcg_rn = cpu_reg_sp(s, rn);
3010     tcg_addr = tcg_temp_new_i64();
3011     tcg_gen_mov_i64(tcg_addr, tcg_rn);
3012
3013     for (r = 0; r < rpt; r++) {
3014         int e;
3015         for (e = 0; e < elements; e++) {
3016             int tt = (rt + r) % 32;
3017             int xs;
3018             for (xs = 0; xs < selem; xs++) {
3019                 if (is_store) {
3020                     do_vec_st(s, tt, e, tcg_addr, size);
3021                 } else {
3022                     do_vec_ld(s, tt, e, tcg_addr, size);
3023
3024                     /* For non-quad operations, setting a slice of the low
3025                      * 64 bits of the register clears the high 64 bits (in
3026                      * the ARM ARM pseudocode this is implicit in the fact
3027                      * that 'rval' is a 64 bit wide variable).
3028                      * For quad operations, we might still need to zero the
3029                      * high bits of SVE.  We optimize by noticing that we only
3030                      * need to do this the first time we touch a register.
3031                      */
3032                     if (e == 0 && (r == 0 || xs == selem - 1)) {
3033                         clear_vec_high(s, is_q, tt);
3034                     }
3035                 }
3036                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
3037                 tt = (tt + 1) % 32;
3038             }
3039         }
3040     }
3041
3042     if (is_postidx) {
3043         int rm = extract32(insn, 16, 5);
3044         if (rm == 31) {
3045             tcg_gen_mov_i64(tcg_rn, tcg_addr);
3046         } else {
3047             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3048         }
3049     }
3050     tcg_temp_free_i64(tcg_addr);
3051 }
3052
3053 /* AdvSIMD load/store single structure
3054  *
3055  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3056  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3057  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
3058  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3059  *
3060  * AdvSIMD load/store single structure (post-indexed)
3061  *
3062  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
3063  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3064  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
3065  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
3066  *
3067  * Rt: first (or only) SIMD&FP register to be transferred
3068  * Rn: base address or SP
3069  * Rm (post-index only): post-index register (when !31) or size dependent #imm
3070  * index = encoded in Q:S:size dependent on size
3071  *
3072  * lane_size = encoded in R, opc
3073  * transfer width = encoded in opc, S, size
3074  */
3075 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
3076 {
3077     int rt = extract32(insn, 0, 5);
3078     int rn = extract32(insn, 5, 5);
3079     int size = extract32(insn, 10, 2);
3080     int S = extract32(insn, 12, 1);
3081     int opc = extract32(insn, 13, 3);
3082     int R = extract32(insn, 21, 1);
3083     int is_load = extract32(insn, 22, 1);
3084     int is_postidx = extract32(insn, 23, 1);
3085     int is_q = extract32(insn, 30, 1);
3086
3087     int scale = extract32(opc, 1, 2);
3088     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
3089     bool replicate = false;
3090     int index = is_q << 3 | S << 2 | size;
3091     int ebytes, xs;
3092     TCGv_i64 tcg_addr, tcg_rn;
3093
3094     switch (scale) {
3095     case 3:
3096         if (!is_load || S) {
3097             unallocated_encoding(s);
3098             return;
3099         }
3100         scale = size;
3101         replicate = true;
3102         break;
3103     case 0:
3104         break;
3105     case 1:
3106         if (extract32(size, 0, 1)) {
3107             unallocated_encoding(s);
3108             return;
3109         }
3110         index >>= 1;
3111         break;
3112     case 2:
3113         if (extract32(size, 1, 1)) {
3114             unallocated_encoding(s);
3115             return;
3116         }
3117         if (!extract32(size, 0, 1)) {
3118             index >>= 2;
3119         } else {
3120             if (S) {
3121                 unallocated_encoding(s);
3122                 return;
3123             }
3124             index >>= 3;
3125             scale = 3;
3126         }
3127         break;
3128     default:
3129         g_assert_not_reached();
3130     }
3131
3132     if (!fp_access_check(s)) {
3133         return;
3134     }
3135
3136     ebytes = 1 << scale;
3137
3138     if (rn == 31) {
3139         gen_check_sp_alignment(s);
3140     }
3141
3142     tcg_rn = cpu_reg_sp(s, rn);
3143     tcg_addr = tcg_temp_new_i64();
3144     tcg_gen_mov_i64(tcg_addr, tcg_rn);
3145
3146     for (xs = 0; xs < selem; xs++) {
3147         if (replicate) {
3148             /* Load and replicate to all elements */
3149             uint64_t mulconst;
3150             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
3151
3152             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
3153                                 get_mem_index(s), s->be_data + scale);
3154             switch (scale) {
3155             case 0:
3156                 mulconst = 0x0101010101010101ULL;
3157                 break;
3158             case 1:
3159                 mulconst = 0x0001000100010001ULL;
3160                 break;
3161             case 2:
3162                 mulconst = 0x0000000100000001ULL;
3163                 break;
3164             case 3:
3165                 mulconst = 0;
3166                 break;
3167             default:
3168                 g_assert_not_reached();
3169             }
3170             if (mulconst) {
3171                 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
3172             }
3173             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
3174             if (is_q) {
3175                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
3176             }
3177             tcg_temp_free_i64(tcg_tmp);
3178             clear_vec_high(s, is_q, rt);
3179         } else {
3180             /* Load/store one element per register */
3181             if (is_load) {
3182                 do_vec_ld(s, rt, index, tcg_addr, scale);
3183             } else {
3184                 do_vec_st(s, rt, index, tcg_addr, scale);
3185             }
3186         }
3187         tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
3188         rt = (rt + 1) % 32;
3189     }
3190
3191     if (is_postidx) {
3192         int rm = extract32(insn, 16, 5);
3193         if (rm == 31) {
3194             tcg_gen_mov_i64(tcg_rn, tcg_addr);
3195         } else {
3196             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3197         }
3198     }
3199     tcg_temp_free_i64(tcg_addr);
3200 }
3201
3202 /* Loads and stores */
3203 static void disas_ldst(DisasContext *s, uint32_t insn)
3204 {
3205     switch (extract32(insn, 24, 6)) {
3206     case 0x08: /* Load/store exclusive */
3207         disas_ldst_excl(s, insn);
3208         break;
3209     case 0x18: case 0x1c: /* Load register (literal) */
3210         disas_ld_lit(s, insn);
3211         break;
3212     case 0x28: case 0x29:
3213     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
3214         disas_ldst_pair(s, insn);
3215         break;
3216     case 0x38: case 0x39:
3217     case 0x3c: case 0x3d: /* Load/store register (all forms) */
3218         disas_ldst_reg(s, insn);
3219         break;
3220     case 0x0c: /* AdvSIMD load/store multiple structures */
3221         disas_ldst_multiple_struct(s, insn);
3222         break;
3223     case 0x0d: /* AdvSIMD load/store single structure */
3224         disas_ldst_single_struct(s, insn);
3225         break;
3226     default:
3227         unallocated_encoding(s);
3228         break;
3229     }
3230 }
3231
3232 /* PC-rel. addressing
3233  *   31  30   29 28       24 23                5 4    0
3234  * +----+-------+-----------+-------------------+------+
3235  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
3236  * +----+-------+-----------+-------------------+------+
3237  */
3238 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
3239 {
3240     unsigned int page, rd;
3241     uint64_t base;
3242     uint64_t offset;
3243
3244     page = extract32(insn, 31, 1);
3245     /* SignExtend(immhi:immlo) -> offset */
3246     offset = sextract64(insn, 5, 19);
3247     offset = offset << 2 | extract32(insn, 29, 2);
3248     rd = extract32(insn, 0, 5);
3249     base = s->pc - 4;
3250
3251     if (page) {
3252         /* ADRP (page based) */
3253         base &= ~0xfff;
3254         offset <<= 12;
3255     }
3256
3257     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
3258 }
3259
3260 /*
3261  * Add/subtract (immediate)
3262  *
3263  *  31 30 29 28       24 23 22 21         10 9   5 4   0
3264  * +--+--+--+-----------+-----+-------------+-----+-----+
3265  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
3266  * +--+--+--+-----------+-----+-------------+-----+-----+
3267  *
3268  *    sf: 0 -> 32bit, 1 -> 64bit
3269  *    op: 0 -> add  , 1 -> sub
3270  *     S: 1 -> set flags
3271  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
3272  */
3273 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
3274 {
3275     int rd = extract32(insn, 0, 5);
3276     int rn = extract32(insn, 5, 5);
3277     uint64_t imm = extract32(insn, 10, 12);
3278     int shift = extract32(insn, 22, 2);
3279     bool setflags = extract32(insn, 29, 1);
3280     bool sub_op = extract32(insn, 30, 1);
3281     bool is_64bit = extract32(insn, 31, 1);
3282
3283     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3284     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
3285     TCGv_i64 tcg_result;
3286
3287     switch (shift) {
3288     case 0x0:
3289         break;
3290     case 0x1:
3291         imm <<= 12;
3292         break;
3293     default:
3294         unallocated_encoding(s);
3295         return;
3296     }
3297
3298     tcg_result = tcg_temp_new_i64();
3299     if (!setflags) {
3300         if (sub_op) {
3301             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
3302         } else {
3303             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
3304         }
3305     } else {
3306         TCGv_i64 tcg_imm = tcg_const_i64(imm);
3307         if (sub_op) {
3308             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3309         } else {
3310             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3311         }
3312         tcg_temp_free_i64(tcg_imm);
3313     }
3314
3315     if (is_64bit) {
3316         tcg_gen_mov_i64(tcg_rd, tcg_result);
3317     } else {
3318         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3319     }
3320
3321     tcg_temp_free_i64(tcg_result);
3322 }
3323
3324 /* The input should be a value in the bottom e bits (with higher
3325  * bits zero); returns that value replicated into every element
3326  * of size e in a 64 bit integer.
3327  */
3328 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
3329 {
3330     assert(e != 0);
3331     while (e < 64) {
3332         mask |= mask << e;
3333         e *= 2;
3334     }
3335     return mask;
3336 }
3337
3338 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
3339 static inline uint64_t bitmask64(unsigned int length)
3340 {
3341     assert(length > 0 && length <= 64);
3342     return ~0ULL >> (64 - length);
3343 }
3344
3345 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
3346  * only require the wmask. Returns false if the imms/immr/immn are a reserved
3347  * value (ie should cause a guest UNDEF exception), and true if they are
3348  * valid, in which case the decoded bit pattern is written to result.
3349  */
3350 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3351                             unsigned int imms, unsigned int immr)
3352 {
3353     uint64_t mask;
3354     unsigned e, levels, s, r;
3355     int len;
3356
3357     assert(immn < 2 && imms < 64 && immr < 64);
3358
3359     /* The bit patterns we create here are 64 bit patterns which
3360      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3361      * 64 bits each. Each element contains the same value: a run
3362      * of between 1 and e-1 non-zero bits, rotated within the
3363      * element by between 0 and e-1 bits.
3364      *
3365      * The element size and run length are encoded into immn (1 bit)
3366      * and imms (6 bits) as follows:
3367      * 64 bit elements: immn = 1, imms = <length of run - 1>
3368      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3369      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3370      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3371      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3372      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3373      * Notice that immn = 0, imms = 11111x is the only combination
3374      * not covered by one of the above options; this is reserved.
3375      * Further, <length of run - 1> all-ones is a reserved pattern.
3376      *
3377      * In all cases the rotation is by immr % e (and immr is 6 bits).
3378      */
3379
3380     /* First determine the element size */
3381     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3382     if (len < 1) {
3383         /* This is the immn == 0, imms == 0x11111x case */
3384         return false;
3385     }
3386     e = 1 << len;
3387
3388     levels = e - 1;
3389     s = imms & levels;
3390     r = immr & levels;
3391
3392     if (s == levels) {
3393         /* <length of run - 1> mustn't be all-ones. */
3394         return false;
3395     }
3396
3397     /* Create the value of one element: s+1 set bits rotated
3398      * by r within the element (which is e bits wide)...
3399      */
3400     mask = bitmask64(s + 1);
3401     if (r) {
3402         mask = (mask >> r) | (mask << (e - r));
3403         mask &= bitmask64(e);
3404     }
3405     /* ...then replicate the element over the whole 64 bit value */
3406     mask = bitfield_replicate(mask, e);
3407     *result = mask;
3408     return true;
3409 }
3410
3411 /* Logical (immediate)
3412  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3413  * +----+-----+-------------+---+------+------+------+------+
3414  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
3415  * +----+-----+-------------+---+------+------+------+------+
3416  */
3417 static void disas_logic_imm(DisasContext *s, uint32_t insn)
3418 {
3419     unsigned int sf, opc, is_n, immr, imms, rn, rd;
3420     TCGv_i64 tcg_rd, tcg_rn;
3421     uint64_t wmask;
3422     bool is_and = false;
3423
3424     sf = extract32(insn, 31, 1);
3425     opc = extract32(insn, 29, 2);
3426     is_n = extract32(insn, 22, 1);
3427     immr = extract32(insn, 16, 6);
3428     imms = extract32(insn, 10, 6);
3429     rn = extract32(insn, 5, 5);
3430     rd = extract32(insn, 0, 5);
3431
3432     if (!sf && is_n) {
3433         unallocated_encoding(s);
3434         return;
3435     }
3436
3437     if (opc == 0x3) { /* ANDS */
3438         tcg_rd = cpu_reg(s, rd);
3439     } else {
3440         tcg_rd = cpu_reg_sp(s, rd);
3441     }
3442     tcg_rn = cpu_reg(s, rn);
3443
3444     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3445         /* some immediate field values are reserved */
3446         unallocated_encoding(s);
3447         return;
3448     }
3449
3450     if (!sf) {
3451         wmask &= 0xffffffff;
3452     }
3453
3454     switch (opc) {
3455     case 0x3: /* ANDS */
3456     case 0x0: /* AND */
3457         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3458         is_and = true;
3459         break;
3460     case 0x1: /* ORR */
3461         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3462         break;
3463     case 0x2: /* EOR */
3464         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3465         break;
3466     default:
3467         assert(FALSE); /* must handle all above */
3468         break;
3469     }
3470
3471     if (!sf && !is_and) {
3472         /* zero extend final result; we know we can skip this for AND
3473          * since the immediate had the high 32 bits clear.
3474          */
3475         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3476     }
3477
3478     if (opc == 3) { /* ANDS */
3479         gen_logic_CC(sf, tcg_rd);
3480     }
3481 }
3482
3483 /*
3484  * Move wide (immediate)
3485  *
3486  *  31 30 29 28         23 22 21 20             5 4    0
3487  * +--+-----+-------------+-----+----------------+------+
3488  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3489  * +--+-----+-------------+-----+----------------+------+
3490  *
3491  * sf: 0 -> 32 bit, 1 -> 64 bit
3492  * opc: 00 -> N, 10 -> Z, 11 -> K
3493  * hw: shift/16 (0,16, and sf only 32, 48)
3494  */
3495 static void disas_movw_imm(DisasContext *s, uint32_t insn)
3496 {
3497     int rd = extract32(insn, 0, 5);
3498     uint64_t imm = extract32(insn, 5, 16);
3499     int sf = extract32(insn, 31, 1);
3500     int opc = extract32(insn, 29, 2);
3501     int pos = extract32(insn, 21, 2) << 4;
3502     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3503     TCGv_i64 tcg_imm;
3504
3505     if (!sf && (pos >= 32)) {
3506         unallocated_encoding(s);
3507         return;
3508     }
3509
3510     switch (opc) {
3511     case 0: /* MOVN */
3512     case 2: /* MOVZ */
3513         imm <<= pos;
3514         if (opc == 0) {
3515             imm = ~imm;
3516         }
3517         if (!sf) {
3518             imm &= 0xffffffffu;
3519         }
3520         tcg_gen_movi_i64(tcg_rd, imm);
3521         break;
3522     case 3: /* MOVK */
3523         tcg_imm = tcg_const_i64(imm);
3524         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3525         tcg_temp_free_i64(tcg_imm);
3526         if (!sf) {
3527             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3528         }
3529         break;
3530     default:
3531         unallocated_encoding(s);
3532         break;
3533     }
3534 }
3535
3536 /* Bitfield
3537  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3538  * +----+-----+-------------+---+------+------+------+------+
3539  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3540  * +----+-----+-------------+---+------+------+------+------+
3541  */
3542 static void disas_bitfield(DisasContext *s, uint32_t insn)
3543 {
3544     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3545     TCGv_i64 tcg_rd, tcg_tmp;
3546
3547     sf = extract32(insn, 31, 1);
3548     opc = extract32(insn, 29, 2);
3549     n = extract32(insn, 22, 1);
3550     ri = extract32(insn, 16, 6);
3551     si = extract32(insn, 10, 6);
3552     rn = extract32(insn, 5, 5);
3553     rd = extract32(insn, 0, 5);
3554     bitsize = sf ? 64 : 32;
3555
3556     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3557         unallocated_encoding(s);
3558         return;
3559     }
3560
3561     tcg_rd = cpu_reg(s, rd);
3562
3563     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3564        to be smaller than bitsize, we'll never reference data outside the
3565        low 32-bits anyway.  */
3566     tcg_tmp = read_cpu_reg(s, rn, 1);
3567
3568     /* Recognize simple(r) extractions.  */
3569     if (si >= ri) {
3570         /* Wd<s-r:0> = Wn<s:r> */
3571         len = (si - ri) + 1;
3572         if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
3573             tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
3574             goto done;
3575         } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
3576             tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
3577             return;
3578         }
3579         /* opc == 1, BXFIL fall through to deposit */
3580         tcg_gen_extract_i64(tcg_tmp, tcg_tmp, ri, len);
3581         pos = 0;
3582     } else {
3583         /* Handle the ri > si case with a deposit
3584          * Wd<32+s-r,32-r> = Wn<s:0>
3585          */
3586         len = si + 1;
3587         pos = (bitsize - ri) & (bitsize - 1);
3588     }
3589
3590     if (opc == 0 && len < ri) {
3591         /* SBFM: sign extend the destination field from len to fill
3592            the balance of the word.  Let the deposit below insert all
3593            of those sign bits.  */
3594         tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
3595         len = ri;
3596     }
3597
3598     if (opc == 1) { /* BFM, BXFIL */
3599         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3600     } else {
3601         /* SBFM or UBFM: We start with zero, and we haven't modified
3602            any bits outside bitsize, therefore the zero-extension
3603            below is unneeded.  */
3604         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
3605         return;
3606     }
3607
3608  done:
3609     if (!sf) { /* zero extend final result */
3610         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3611     }
3612 }
3613
3614 /* Extract
3615  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3616  * +----+------+-------------+---+----+------+--------+------+------+
3617  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3618  * +----+------+-------------+---+----+------+--------+------+------+
3619  */
3620 static void disas_extract(DisasContext *s, uint32_t insn)
3621 {
3622     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3623
3624     sf = extract32(insn, 31, 1);
3625     n = extract32(insn, 22, 1);
3626     rm = extract32(insn, 16, 5);
3627     imm = extract32(insn, 10, 6);
3628     rn = extract32(insn, 5, 5);
3629     rd = extract32(insn, 0, 5);
3630     op21 = extract32(insn, 29, 2);
3631     op0 = extract32(insn, 21, 1);
3632     bitsize = sf ? 64 : 32;
3633
3634     if (sf != n || op21 || op0 || imm >= bitsize) {
3635         unallocated_encoding(s);
3636     } else {
3637         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3638
3639         tcg_rd = cpu_reg(s, rd);
3640
3641         if (unlikely(imm == 0)) {
3642             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3643              * so an extract from bit 0 is a special case.
3644              */
3645             if (sf) {
3646                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3647             } else {
3648                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3649             }
3650         } else if (rm == rn) { /* ROR */
3651             tcg_rm = cpu_reg(s, rm);
3652             if (sf) {
3653                 tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3654             } else {
3655                 TCGv_i32 tmp = tcg_temp_new_i32();
3656                 tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3657                 tcg_gen_rotri_i32(tmp, tmp, imm);
3658                 tcg_gen_extu_i32_i64(tcg_rd, tmp);
3659                 tcg_temp_free_i32(tmp);
3660             }
3661         } else {
3662             tcg_rm = read_cpu_reg(s, rm, sf);
3663             tcg_rn = read_cpu_reg(s, rn, sf);
3664             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3665             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3666             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3667             if (!sf) {
3668                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3669             }
3670         }
3671     }
3672 }
3673
3674 /* Data processing - immediate */
3675 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3676 {
3677     switch (extract32(insn, 23, 6)) {
3678     case 0x20: case 0x21: /* PC-rel. addressing */
3679         disas_pc_rel_adr(s, insn);
3680         break;
3681     case 0x22: case 0x23: /* Add/subtract (immediate) */
3682         disas_add_sub_imm(s, insn);
3683         break;
3684     case 0x24: /* Logical (immediate) */
3685         disas_logic_imm(s, insn);
3686         break;
3687     case 0x25: /* Move wide (immediate) */
3688         disas_movw_imm(s, insn);
3689         break;
3690     case 0x26: /* Bitfield */
3691         disas_bitfield(s, insn);
3692         break;
3693     case 0x27: /* Extract */
3694         disas_extract(s, insn);
3695         break;
3696     default:
3697         unallocated_encoding(s);
3698         break;
3699     }
3700 }
3701
3702 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3703  * Note that it is the caller's responsibility to ensure that the
3704  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3705  * mandated semantics for out of range shifts.
3706  */
3707 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3708                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3709 {
3710     switch (shift_type) {
3711     case A64_SHIFT_TYPE_LSL:
3712         tcg_gen_shl_i64(dst, src, shift_amount);
3713         break;
3714     case A64_SHIFT_TYPE_LSR:
3715         tcg_gen_shr_i64(dst, src, shift_amount);
3716         break;
3717     case A64_SHIFT_TYPE_ASR:
3718         if (!sf) {
3719             tcg_gen_ext32s_i64(dst, src);
3720         }
3721         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3722         break;
3723     case A64_SHIFT_TYPE_ROR:
3724         if (sf) {
3725             tcg_gen_rotr_i64(dst, src, shift_amount);
3726         } else {
3727             TCGv_i32 t0, t1;
3728             t0 = tcg_temp_new_i32();
3729             t1 = tcg_temp_new_i32();
3730             tcg_gen_extrl_i64_i32(t0, src);
3731             tcg_gen_extrl_i64_i32(t1, shift_amount);
3732             tcg_gen_rotr_i32(t0, t0, t1);
3733             tcg_gen_extu_i32_i64(dst, t0);
3734             tcg_temp_free_i32(t0);
3735             tcg_temp_free_i32(t1);
3736         }
3737         break;
3738     default:
3739         assert(FALSE); /* all shift types should be handled */
3740         break;
3741     }
3742
3743     if (!sf) { /* zero extend final result */
3744         tcg_gen_ext32u_i64(dst, dst);
3745     }
3746 }
3747
3748 /* Shift a TCGv src by immediate, put result in dst.
3749  * The shift amount must be in range (this should always be true as the
3750  * relevant instructions will UNDEF on bad shift immediates).
3751  */
3752 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3753                           enum a64_shift_type shift_type, unsigned int shift_i)
3754 {
3755     assert(shift_i < (sf ? 64 : 32));
3756
3757     if (shift_i == 0) {
3758         tcg_gen_mov_i64(dst, src);
3759     } else {
3760         TCGv_i64 shift_const;
3761
3762         shift_const = tcg_const_i64(shift_i);
3763         shift_reg(dst, src, sf, shift_type, shift_const);
3764         tcg_temp_free_i64(shift_const);
3765     }
3766 }
3767
3768 /* Logical (shifted register)
3769  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3770  * +----+-----+-----------+-------+---+------+--------+------+------+
3771  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3772  * +----+-----+-----------+-------+---+------+--------+------+------+
3773  */
3774 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3775 {
3776     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3777     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3778
3779     sf = extract32(insn, 31, 1);
3780     opc = extract32(insn, 29, 2);
3781     shift_type = extract32(insn, 22, 2);
3782     invert = extract32(insn, 21, 1);
3783     rm = extract32(insn, 16, 5);
3784     shift_amount = extract32(insn, 10, 6);
3785     rn = extract32(insn, 5, 5);
3786     rd = extract32(insn, 0, 5);
3787
3788     if (!sf && (shift_amount & (1 << 5))) {
3789         unallocated_encoding(s);
3790         return;
3791     }
3792
3793     tcg_rd = cpu_reg(s, rd);
3794
3795     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3796         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3797          * register-register MOV and MVN, so it is worth special casing.
3798          */
3799         tcg_rm = cpu_reg(s, rm);
3800         if (invert) {
3801             tcg_gen_not_i64(tcg_rd, tcg_rm);
3802             if (!sf) {
3803                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3804             }
3805         } else {
3806             if (sf) {
3807                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3808             } else {
3809                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3810             }
3811         }
3812         return;
3813     }
3814
3815     tcg_rm = read_cpu_reg(s, rm, sf);
3816
3817     if (shift_amount) {
3818         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3819     }
3820
3821     tcg_rn = cpu_reg(s, rn);
3822
3823     switch (opc | (invert << 2)) {
3824     case 0: /* AND */
3825     case 3: /* ANDS */
3826         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3827         break;
3828     case 1: /* ORR */
3829         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3830         break;
3831     case 2: /* EOR */
3832         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3833         break;
3834     case 4: /* BIC */
3835     case 7: /* BICS */
3836         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3837         break;
3838     case 5: /* ORN */
3839         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3840         break;
3841     case 6: /* EON */
3842         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3843         break;
3844     default:
3845         assert(FALSE);
3846         break;
3847     }
3848
3849     if (!sf) {
3850         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3851     }
3852
3853     if (opc == 3) {
3854         gen_logic_CC(sf, tcg_rd);
3855     }
3856 }
3857
3858 /*
3859  * Add/subtract (extended register)
3860  *
3861  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3862  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3863  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3864  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3865  *
3866  *  sf: 0 -> 32bit, 1 -> 64bit
3867  *  op: 0 -> add  , 1 -> sub
3868  *   S: 1 -> set flags
3869  * opt: 00
3870  * option: extension type (see DecodeRegExtend)
3871  * imm3: optional shift to Rm
3872  *
3873  * Rd = Rn + LSL(extend(Rm), amount)
3874  */
3875 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3876 {
3877     int rd = extract32(insn, 0, 5);
3878     int rn = extract32(insn, 5, 5);
3879     int imm3 = extract32(insn, 10, 3);
3880     int option = extract32(insn, 13, 3);
3881     int rm = extract32(insn, 16, 5);
3882     bool setflags = extract32(insn, 29, 1);
3883     bool sub_op = extract32(insn, 30, 1);
3884     bool sf = extract32(insn, 31, 1);
3885
3886     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3887     TCGv_i64 tcg_rd;
3888     TCGv_i64 tcg_result;
3889
3890     if (imm3 > 4) {
3891         unallocated_encoding(s);
3892         return;
3893     }
3894
3895     /* non-flag setting ops may use SP */
3896     if (!setflags) {
3897         tcg_rd = cpu_reg_sp(s, rd);
3898     } else {
3899         tcg_rd = cpu_reg(s, rd);
3900     }
3901     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3902
3903     tcg_rm = read_cpu_reg(s, rm, sf);
3904     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3905
3906     tcg_result = tcg_temp_new_i64();
3907
3908     if (!setflags) {
3909         if (sub_op) {
3910             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3911         } else {
3912             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3913         }
3914     } else {
3915         if (sub_op) {
3916             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3917         } else {
3918             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3919         }
3920     }
3921
3922     if (sf) {
3923         tcg_gen_mov_i64(tcg_rd, tcg_result);
3924     } else {
3925         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3926     }
3927
3928     tcg_temp_free_i64(tcg_result);
3929 }
3930
3931 /*
3932  * Add/subtract (shifted register)
3933  *
3934  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3935  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3936  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3937  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3938  *
3939  *    sf: 0 -> 32bit, 1 -> 64bit
3940  *    op: 0 -> add  , 1 -> sub
3941  *     S: 1 -> set flags
3942  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3943  *  imm6: Shift amount to apply to Rm before the add/sub
3944  */
3945 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3946 {
3947     int rd = extract32(insn, 0, 5);
3948     int rn = extract32(insn, 5, 5);
3949     int imm6 = extract32(insn, 10, 6);
3950     int rm = extract32(insn, 16, 5);
3951     int shift_type = extract32(insn, 22, 2);
3952     bool setflags = extract32(insn, 29, 1);
3953     bool sub_op = extract32(insn, 30, 1);
3954     bool sf = extract32(insn, 31, 1);
3955
3956     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3957     TCGv_i64 tcg_rn, tcg_rm;
3958     TCGv_i64 tcg_result;
3959
3960     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3961         unallocated_encoding(s);
3962         return;
3963     }
3964
3965     tcg_rn = read_cpu_reg(s, rn, sf);
3966     tcg_rm = read_cpu_reg(s, rm, sf);
3967
3968     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3969
3970     tcg_result = tcg_temp_new_i64();
3971
3972     if (!setflags) {
3973         if (sub_op) {
3974             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3975         } else {
3976             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3977         }
3978     } else {
3979         if (sub_op) {
3980             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3981         } else {
3982             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3983         }
3984     }
3985
3986     if (sf) {
3987         tcg_gen_mov_i64(tcg_rd, tcg_result);
3988     } else {
3989         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3990     }
3991
3992     tcg_temp_free_i64(tcg_result);
3993 }
3994
3995 /* Data-processing (3 source)
3996  *
3997  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3998  *  +--+------+-----------+------+------+----+------+------+------+
3999  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
4000  *  +--+------+-----------+------+------+----+------+------+------+
4001  */
4002 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
4003 {
4004     int rd = extract32(insn, 0, 5);
4005     int rn = extract32(insn, 5, 5);
4006     int ra = extract32(insn, 10, 5);
4007     int rm = extract32(insn, 16, 5);
4008     int op_id = (extract32(insn, 29, 3) << 4) |
4009         (extract32(insn, 21, 3) << 1) |
4010         extract32(insn, 15, 1);
4011     bool sf = extract32(insn, 31, 1);
4012     bool is_sub = extract32(op_id, 0, 1);
4013     bool is_high = extract32(op_id, 2, 1);
4014     bool is_signed = false;
4015     TCGv_i64 tcg_op1;
4016     TCGv_i64 tcg_op2;
4017     TCGv_i64 tcg_tmp;
4018
4019     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
4020     switch (op_id) {
4021     case 0x42: /* SMADDL */
4022     case 0x43: /* SMSUBL */
4023     case 0x44: /* SMULH */
4024         is_signed = true;
4025         break;
4026     case 0x0: /* MADD (32bit) */
4027     case 0x1: /* MSUB (32bit) */
4028     case 0x40: /* MADD (64bit) */
4029     case 0x41: /* MSUB (64bit) */
4030     case 0x4a: /* UMADDL */
4031     case 0x4b: /* UMSUBL */
4032     case 0x4c: /* UMULH */
4033         break;
4034     default:
4035         unallocated_encoding(s);
4036         return;
4037     }
4038
4039     if (is_high) {
4040         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
4041         TCGv_i64 tcg_rd = cpu_reg(s, rd);
4042         TCGv_i64 tcg_rn = cpu_reg(s, rn);
4043         TCGv_i64 tcg_rm = cpu_reg(s, rm);
4044
4045         if (is_signed) {
4046             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4047         } else {
4048             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
4049         }
4050
4051         tcg_temp_free_i64(low_bits);
4052         return;
4053     }
4054
4055     tcg_op1 = tcg_temp_new_i64();
4056     tcg_op2 = tcg_temp_new_i64();
4057     tcg_tmp = tcg_temp_new_i64();
4058
4059     if (op_id < 0x42) {
4060         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
4061         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
4062     } else {
4063         if (is_signed) {
4064             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
4065             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
4066         } else {
4067             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
4068             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
4069         }
4070     }
4071
4072     if (ra == 31 && !is_sub) {
4073         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
4074         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
4075     } else {
4076         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
4077         if (is_sub) {
4078             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4079         } else {
4080             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
4081         }
4082     }
4083
4084     if (!sf) {
4085         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
4086     }
4087
4088     tcg_temp_free_i64(tcg_op1);
4089     tcg_temp_free_i64(tcg_op2);
4090     tcg_temp_free_i64(tcg_tmp);
4091 }
4092
4093 /* Add/subtract (with carry)
4094  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
4095  * +--+--+--+------------------------+------+---------+------+-----+
4096  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
4097  * +--+--+--+------------------------+------+---------+------+-----+
4098  *                                            [000000]
4099  */
4100
4101 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
4102 {
4103     unsigned int sf, op, setflags, rm, rn, rd;
4104     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
4105
4106     if (extract32(insn, 10, 6) != 0) {
4107         unallocated_encoding(s);
4108         return;
4109     }
4110
4111     sf = extract32(insn, 31, 1);
4112     op = extract32(insn, 30, 1);
4113     setflags = extract32(insn, 29, 1);
4114     rm = extract32(insn, 16, 5);
4115     rn = extract32(insn, 5, 5);
4116     rd = extract32(insn, 0, 5);
4117
4118     tcg_rd = cpu_reg(s, rd);
4119     tcg_rn = cpu_reg(s, rn);
4120
4121     if (op) {
4122         tcg_y = new_tmp_a64(s);
4123         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
4124     } else {
4125         tcg_y = cpu_reg(s, rm);
4126     }
4127
4128     if (setflags) {
4129         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
4130     } else {
4131         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
4132     }
4133 }
4134
4135 /* Conditional compare (immediate / register)
4136  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
4137  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4138  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
4139  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
4140  *        [1]                             y                [0]       [0]
4141  */
4142 static void disas_cc(DisasContext *s, uint32_t insn)
4143 {
4144     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
4145     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
4146     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
4147     DisasCompare c;
4148
4149     if (!extract32(insn, 29, 1)) {
4150         unallocated_encoding(s);
4151         return;
4152     }
4153     if (insn & (1 << 10 | 1 << 4)) {
4154         unallocated_encoding(s);
4155         return;
4156     }
4157     sf = extract32(insn, 31, 1);
4158     op = extract32(insn, 30, 1);
4159     is_imm = extract32(insn, 11, 1);
4160     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
4161     cond = extract32(insn, 12, 4);
4162     rn = extract32(insn, 5, 5);
4163     nzcv = extract32(insn, 0, 4);
4164
4165     /* Set T0 = !COND.  */
4166     tcg_t0 = tcg_temp_new_i32();
4167     arm_test_cc(&c, cond);
4168     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
4169     arm_free_cc(&c);
4170
4171     /* Load the arguments for the new comparison.  */
4172     if (is_imm) {
4173         tcg_y = new_tmp_a64(s);
4174         tcg_gen_movi_i64(tcg_y, y);
4175     } else {
4176         tcg_y = cpu_reg(s, y);
4177     }
4178     tcg_rn = cpu_reg(s, rn);
4179
4180     /* Set the flags for the new comparison.  */
4181     tcg_tmp = tcg_temp_new_i64();
4182     if (op) {
4183         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4184     } else {
4185         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
4186     }
4187     tcg_temp_free_i64(tcg_tmp);
4188
4189     /* If COND was false, force the flags to #nzcv.  Compute two masks
4190      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
4191      * For tcg hosts that support ANDC, we can make do with just T1.
4192      * In either case, allow the tcg optimizer to delete any unused mask.
4193      */
4194     tcg_t1 = tcg_temp_new_i32();
4195     tcg_t2 = tcg_temp_new_i32();
4196     tcg_gen_neg_i32(tcg_t1, tcg_t0);
4197     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
4198
4199     if (nzcv & 8) { /* N */
4200         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
4201     } else {
4202         if (TCG_TARGET_HAS_andc_i32) {
4203             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
4204         } else {
4205             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
4206         }
4207     }
4208     if (nzcv & 4) { /* Z */
4209         if (TCG_TARGET_HAS_andc_i32) {
4210             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
4211         } else {
4212             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
4213         }
4214     } else {
4215         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
4216     }
4217     if (nzcv & 2) { /* C */
4218         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
4219     } else {
4220         if (TCG_TARGET_HAS_andc_i32) {
4221             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
4222         } else {
4223             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
4224         }
4225     }
4226     if (nzcv & 1) { /* V */
4227         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
4228     } else {
4229         if (TCG_TARGET_HAS_andc_i32) {
4230             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
4231         } else {
4232             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
4233         }
4234     }
4235     tcg_temp_free_i32(tcg_t0);
4236     tcg_temp_free_i32(tcg_t1);
4237     tcg_temp_free_i32(tcg_t2);
4238 }
4239
4240 /* Conditional select
4241  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
4242  * +----+----+---+-----------------+------+------+-----+------+------+
4243  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
4244  * +----+----+---+-----------------+------+------+-----+------+------+
4245  */
4246 static void disas_cond_select(DisasContext *s, uint32_t insn)
4247 {
4248     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
4249     TCGv_i64 tcg_rd, zero;
4250     DisasCompare64 c;
4251
4252     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
4253         /* S == 1 or op2<1> == 1 */
4254         unallocated_encoding(s);
4255         return;
4256     }
4257     sf = extract32(insn, 31, 1);
4258     else_inv = extract32(insn, 30, 1);
4259     rm = extract32(insn, 16, 5);
4260     cond = extract32(insn, 12, 4);
4261     else_inc = extract32(insn, 10, 1);
4262     rn = extract32(insn, 5, 5);
4263     rd = extract32(insn, 0, 5);
4264
4265     tcg_rd = cpu_reg(s, rd);
4266
4267     a64_test_cc(&c, cond);
4268     zero = tcg_const_i64(0);
4269
4270     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
4271         /* CSET & CSETM.  */
4272         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
4273         if (else_inv) {
4274             tcg_gen_neg_i64(tcg_rd, tcg_rd);
4275         }
4276     } else {
4277         TCGv_i64 t_true = cpu_reg(s, rn);
4278         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
4279         if (else_inv && else_inc) {
4280             tcg_gen_neg_i64(t_false, t_false);
4281         } else if (else_inv) {
4282             tcg_gen_not_i64(t_false, t_false);
4283         } else if (else_inc) {
4284             tcg_gen_addi_i64(t_false, t_false, 1);
4285         }
4286         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
4287     }
4288
4289     tcg_temp_free_i64(zero);
4290     a64_free_cc(&c);
4291
4292     if (!sf) {
4293         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4294     }
4295 }
4296
4297 static void handle_clz(DisasContext *s, unsigned int sf,
4298                        unsigned int rn, unsigned int rd)
4299 {
4300     TCGv_i64 tcg_rd, tcg_rn;
4301     tcg_rd = cpu_reg(s, rd);
4302     tcg_rn = cpu_reg(s, rn);
4303
4304     if (sf) {
4305         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
4306     } else {
4307         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4308         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4309         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
4310         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4311         tcg_temp_free_i32(tcg_tmp32);
4312     }
4313 }
4314
4315 static void handle_cls(DisasContext *s, unsigned int sf,
4316                        unsigned int rn, unsigned int rd)
4317 {
4318     TCGv_i64 tcg_rd, tcg_rn;
4319     tcg_rd = cpu_reg(s, rd);
4320     tcg_rn = cpu_reg(s, rn);
4321
4322     if (sf) {
4323         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
4324     } else {
4325         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4326         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4327         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
4328         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4329         tcg_temp_free_i32(tcg_tmp32);
4330     }
4331 }
4332
4333 static void handle_rbit(DisasContext *s, unsigned int sf,
4334                         unsigned int rn, unsigned int rd)
4335 {
4336     TCGv_i64 tcg_rd, tcg_rn;
4337     tcg_rd = cpu_reg(s, rd);
4338     tcg_rn = cpu_reg(s, rn);
4339
4340     if (sf) {
4341         gen_helper_rbit64(tcg_rd, tcg_rn);
4342     } else {
4343         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4344         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4345         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4346         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4347         tcg_temp_free_i32(tcg_tmp32);
4348     }
4349 }
4350
4351 /* REV with sf==1, opcode==3 ("REV64") */
4352 static void handle_rev64(DisasContext *s, unsigned int sf,
4353                          unsigned int rn, unsigned int rd)
4354 {
4355     if (!sf) {
4356         unallocated_encoding(s);
4357         return;
4358     }
4359     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4360 }
4361
4362 /* REV with sf==0, opcode==2
4363  * REV32 (sf==1, opcode==2)
4364  */
4365 static void handle_rev32(DisasContext *s, unsigned int sf,
4366                          unsigned int rn, unsigned int rd)
4367 {
4368     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4369
4370     if (sf) {
4371         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4372         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4373
4374         /* bswap32_i64 requires zero high word */
4375         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4376         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4377         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4378         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4379         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4380
4381         tcg_temp_free_i64(tcg_tmp);
4382     } else {
4383         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4384         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4385     }
4386 }
4387
4388 /* REV16 (opcode==1) */
4389 static void handle_rev16(DisasContext *s, unsigned int sf,
4390                          unsigned int rn, unsigned int rd)
4391 {
4392     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4393     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4394     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4395     TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
4396
4397     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
4398     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
4399     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
4400     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
4401     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
4402
4403     tcg_temp_free_i64(mask);
4404     tcg_temp_free_i64(tcg_tmp);
4405 }
4406
4407 /* Data-processing (1 source)
4408  *   31  30  29  28             21 20     16 15    10 9    5 4    0
4409  * +----+---+---+-----------------+---------+--------+------+------+
4410  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4411  * +----+---+---+-----------------+---------+--------+------+------+
4412  */
4413 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4414 {
4415     unsigned int sf, opcode, rn, rd;
4416
4417     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
4418         unallocated_encoding(s);
4419         return;
4420     }
4421
4422     sf = extract32(insn, 31, 1);
4423     opcode = extract32(insn, 10, 6);
4424     rn = extract32(insn, 5, 5);
4425     rd = extract32(insn, 0, 5);
4426
4427     switch (opcode) {
4428     case 0: /* RBIT */
4429         handle_rbit(s, sf, rn, rd);
4430         break;
4431     case 1: /* REV16 */
4432         handle_rev16(s, sf, rn, rd);
4433         break;
4434     case 2: /* REV32 */
4435         handle_rev32(s, sf, rn, rd);
4436         break;
4437     case 3: /* REV64 */
4438         handle_rev64(s, sf, rn, rd);
4439         break;
4440     case 4: /* CLZ */
4441         handle_clz(s, sf, rn, rd);
4442         break;
4443     case 5: /* CLS */
4444         handle_cls(s, sf, rn, rd);
4445         break;
4446     }
4447 }
4448
4449 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
4450                        unsigned int rm, unsigned int rn, unsigned int rd)
4451 {
4452     TCGv_i64 tcg_n, tcg_m, tcg_rd;
4453     tcg_rd = cpu_reg(s, rd);
4454
4455     if (!sf && is_signed) {
4456         tcg_n = new_tmp_a64(s);
4457         tcg_m = new_tmp_a64(s);
4458         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
4459         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
4460     } else {
4461         tcg_n = read_cpu_reg(s, rn, sf);
4462         tcg_m = read_cpu_reg(s, rm, sf);
4463     }
4464
4465     if (is_signed) {
4466         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
4467     } else {
4468         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
4469     }
4470
4471     if (!sf) { /* zero extend final result */
4472         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4473     }
4474 }
4475
4476 /* LSLV, LSRV, ASRV, RORV */
4477 static void handle_shift_reg(DisasContext *s,
4478                              enum a64_shift_type shift_type, unsigned int sf,
4479                              unsigned int rm, unsigned int rn, unsigned int rd)
4480 {
4481     TCGv_i64 tcg_shift = tcg_temp_new_i64();
4482     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4483     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4484
4485     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
4486     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
4487     tcg_temp_free_i64(tcg_shift);
4488 }
4489
4490 /* CRC32[BHWX], CRC32C[BHWX] */
4491 static void handle_crc32(DisasContext *s,
4492                          unsigned int sf, unsigned int sz, bool crc32c,
4493                          unsigned int rm, unsigned int rn, unsigned int rd)
4494 {
4495     TCGv_i64 tcg_acc, tcg_val;
4496     TCGv_i32 tcg_bytes;
4497
4498     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
4499         || (sf == 1 && sz != 3)
4500         || (sf == 0 && sz == 3)) {
4501         unallocated_encoding(s);
4502         return;
4503     }
4504
4505     if (sz == 3) {
4506         tcg_val = cpu_reg(s, rm);
4507     } else {
4508         uint64_t mask;
4509         switch (sz) {
4510         case 0:
4511             mask = 0xFF;
4512             break;
4513         case 1:
4514             mask = 0xFFFF;
4515             break;
4516         case 2:
4517             mask = 0xFFFFFFFF;
4518             break;
4519         default:
4520             g_assert_not_reached();
4521         }
4522         tcg_val = new_tmp_a64(s);
4523         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4524     }
4525
4526     tcg_acc = cpu_reg(s, rn);
4527     tcg_bytes = tcg_const_i32(1 << sz);
4528
4529     if (crc32c) {
4530         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4531     } else {
4532         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4533     }
4534
4535     tcg_temp_free_i32(tcg_bytes);
4536 }
4537
4538 /* Data-processing (2 source)
4539  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4540  * +----+---+---+-----------------+------+--------+------+------+
4541  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4542  * +----+---+---+-----------------+------+--------+------+------+
4543  */
4544 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4545 {
4546     unsigned int sf, rm, opcode, rn, rd;
4547     sf = extract32(insn, 31, 1);
4548     rm = extract32(insn, 16, 5);
4549     opcode = extract32(insn, 10, 6);
4550     rn = extract32(insn, 5, 5);
4551     rd = extract32(insn, 0, 5);
4552
4553     if (extract32(insn, 29, 1)) {
4554         unallocated_encoding(s);
4555         return;
4556     }
4557
4558     switch (opcode) {
4559     case 2: /* UDIV */
4560         handle_div(s, false, sf, rm, rn, rd);
4561         break;
4562     case 3: /* SDIV */
4563         handle_div(s, true, sf, rm, rn, rd);
4564         break;
4565     case 8: /* LSLV */
4566         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4567         break;
4568     case 9: /* LSRV */
4569         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4570         break;
4571     case 10: /* ASRV */
4572         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4573         break;
4574     case 11: /* RORV */
4575         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4576         break;
4577     case 16:
4578     case 17:
4579     case 18:
4580     case 19:
4581     case 20:
4582     case 21:
4583     case 22:
4584     case 23: /* CRC32 */
4585     {
4586         int sz = extract32(opcode, 0, 2);
4587         bool crc32c = extract32(opcode, 2, 1);
4588         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4589         break;
4590     }
4591     default:
4592         unallocated_encoding(s);
4593         break;
4594     }
4595 }
4596
4597 /* Data processing - register */
4598 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4599 {
4600     switch (extract32(insn, 24, 5)) {
4601     case 0x0a: /* Logical (shifted register) */
4602         disas_logic_reg(s, insn);
4603         break;
4604     case 0x0b: /* Add/subtract */
4605         if (insn & (1 << 21)) { /* (extended register) */
4606             disas_add_sub_ext_reg(s, insn);
4607         } else {
4608             disas_add_sub_reg(s, insn);
4609         }
4610         break;
4611     case 0x1b: /* Data-processing (3 source) */
4612         disas_data_proc_3src(s, insn);
4613         break;
4614     case 0x1a:
4615         switch (extract32(insn, 21, 3)) {
4616         case 0x0: /* Add/subtract (with carry) */
4617             disas_adc_sbc(s, insn);
4618             break;
4619         case 0x2: /* Conditional compare */
4620             disas_cc(s, insn); /* both imm and reg forms */
4621             break;
4622         case 0x4: /* Conditional select */
4623             disas_cond_select(s, insn);
4624             break;
4625         case 0x6: /* Data-processing */
4626             if (insn & (1 << 30)) { /* (1 source) */
4627                 disas_data_proc_1src(s, insn);
4628             } else {            /* (2 source) */
4629                 disas_data_proc_2src(s, insn);
4630             }
4631             break;
4632         default:
4633             unallocated_encoding(s);
4634             break;
4635         }
4636         break;
4637     default:
4638         unallocated_encoding(s);
4639         break;
4640     }
4641 }
4642
4643 static void handle_fp_compare(DisasContext *s, int size,
4644                               unsigned int rn, unsigned int rm,
4645                               bool cmp_with_zero, bool signal_all_nans)
4646 {
4647     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4648     TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
4649
4650     if (size == MO_64) {
4651         TCGv_i64 tcg_vn, tcg_vm;
4652
4653         tcg_vn = read_fp_dreg(s, rn);
4654         if (cmp_with_zero) {
4655             tcg_vm = tcg_const_i64(0);
4656         } else {
4657             tcg_vm = read_fp_dreg(s, rm);
4658         }
4659         if (signal_all_nans) {
4660             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4661         } else {
4662             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4663         }
4664         tcg_temp_free_i64(tcg_vn);
4665         tcg_temp_free_i64(tcg_vm);
4666     } else {
4667         TCGv_i32 tcg_vn = tcg_temp_new_i32();
4668         TCGv_i32 tcg_vm = tcg_temp_new_i32();
4669
4670         read_vec_element_i32(s, tcg_vn, rn, 0, size);
4671         if (cmp_with_zero) {
4672             tcg_gen_movi_i32(tcg_vm, 0);
4673         } else {
4674             read_vec_element_i32(s, tcg_vm, rm, 0, size);
4675         }
4676
4677         switch (size) {
4678         case MO_32:
4679             if (signal_all_nans) {
4680                 gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4681             } else {
4682                 gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4683             }
4684             break;
4685         case MO_16:
4686             if (signal_all_nans) {
4687                 gen_helper_vfp_cmpeh_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4688             } else {
4689                 gen_helper_vfp_cmph_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4690             }
4691             break;
4692         default:
4693             g_assert_not_reached();
4694         }
4695
4696         tcg_temp_free_i32(tcg_vn);
4697         tcg_temp_free_i32(tcg_vm);
4698     }
4699
4700     tcg_temp_free_ptr(fpst);
4701
4702     gen_set_nzcv(tcg_flags);
4703
4704     tcg_temp_free_i64(tcg_flags);
4705 }
4706
4707 /* Floating point compare
4708  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4709  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4710  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4711  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4712  */
4713 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4714 {
4715     unsigned int mos, type, rm, op, rn, opc, op2r;
4716     int size;
4717
4718     mos = extract32(insn, 29, 3);
4719     type = extract32(insn, 22, 2);
4720     rm = extract32(insn, 16, 5);
4721     op = extract32(insn, 14, 2);
4722     rn = extract32(insn, 5, 5);
4723     opc = extract32(insn, 3, 2);
4724     op2r = extract32(insn, 0, 3);
4725
4726     if (mos || op || op2r) {
4727         unallocated_encoding(s);
4728         return;
4729     }
4730
4731     switch (type) {
4732     case 0:
4733         size = MO_32;
4734         break;
4735     case 1:
4736         size = MO_64;
4737         break;
4738     case 3:
4739         size = MO_16;
4740         if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
4741             break;
4742         }
4743         /* fallthru */
4744     default:
4745         unallocated_encoding(s);
4746         return;
4747     }
4748
4749     if (!fp_access_check(s)) {
4750         return;
4751     }
4752
4753     handle_fp_compare(s, size, rn, rm, opc & 1, opc & 2);
4754 }
4755
4756 /* Floating point conditional compare
4757  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4758  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4759  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4760  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4761  */
4762 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4763 {
4764     unsigned int mos, type, rm, cond, rn, op, nzcv;
4765     TCGv_i64 tcg_flags;
4766     TCGLabel *label_continue = NULL;
4767     int size;
4768
4769     mos = extract32(insn, 29, 3);
4770     type = extract32(insn, 22, 2);
4771     rm = extract32(insn, 16, 5);
4772     cond = extract32(insn, 12, 4);
4773     rn = extract32(insn, 5, 5);
4774     op = extract32(insn, 4, 1);
4775     nzcv = extract32(insn, 0, 4);
4776
4777     if (mos) {
4778         unallocated_encoding(s);
4779         return;
4780     }
4781
4782     switch (type) {
4783     case 0:
4784         size = MO_32;
4785         break;
4786     case 1:
4787         size = MO_64;
4788         break;
4789     case 3:
4790         size = MO_16;
4791         if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
4792             break;
4793         }
4794         /* fallthru */
4795     default:
4796         unallocated_encoding(s);
4797         return;
4798     }
4799
4800     if (!fp_access_check(s)) {
4801         return;
4802     }
4803
4804     if (cond < 0x0e) { /* not always */
4805         TCGLabel *label_match = gen_new_label();
4806         label_continue = gen_new_label();
4807         arm_gen_test_cc(cond, label_match);
4808         /* nomatch: */
4809         tcg_flags = tcg_const_i64(nzcv << 28);
4810         gen_set_nzcv(tcg_flags);
4811         tcg_temp_free_i64(tcg_flags);
4812         tcg_gen_br(label_continue);
4813         gen_set_label(label_match);
4814     }
4815
4816     handle_fp_compare(s, size, rn, rm, false, op);
4817
4818     if (cond < 0x0e) {
4819         gen_set_label(label_continue);
4820     }
4821 }
4822
4823 /* Floating point conditional select
4824  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4825  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4826  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4827  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4828  */
4829 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4830 {
4831     unsigned int mos, type, rm, cond, rn, rd;
4832     TCGv_i64 t_true, t_false, t_zero;
4833     DisasCompare64 c;
4834     TCGMemOp sz;
4835
4836     mos = extract32(insn, 29, 3);
4837     type = extract32(insn, 22, 2);
4838     rm = extract32(insn, 16, 5);
4839     cond = extract32(insn, 12, 4);
4840     rn = extract32(insn, 5, 5);
4841     rd = extract32(insn, 0, 5);
4842
4843     if (mos) {
4844         unallocated_encoding(s);
4845         return;
4846     }
4847
4848     switch (type) {
4849     case 0:
4850         sz = MO_32;
4851         break;
4852     case 1:
4853         sz = MO_64;
4854         break;
4855     case 3:
4856         sz = MO_16;
4857         if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
4858             break;
4859         }
4860         /* fallthru */
4861     default:
4862         unallocated_encoding(s);
4863         return;
4864     }
4865
4866     if (!fp_access_check(s)) {
4867         return;
4868     }
4869
4870     /* Zero extend sreg & hreg inputs to 64 bits now.  */
4871     t_true = tcg_temp_new_i64();
4872     t_false = tcg_temp_new_i64();
4873     read_vec_element(s, t_true, rn, 0, sz);
4874     read_vec_element(s, t_false, rm, 0, sz);
4875
4876     a64_test_cc(&c, cond);
4877     t_zero = tcg_const_i64(0);
4878     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4879     tcg_temp_free_i64(t_zero);
4880     tcg_temp_free_i64(t_false);
4881     a64_free_cc(&c);
4882
4883     /* Note that sregs & hregs write back zeros to the high bits,
4884        and we've already done the zero-extension.  */
4885     write_fp_dreg(s, rd, t_true);
4886     tcg_temp_free_i64(t_true);
4887 }
4888
4889 /* Floating-point data-processing (1 source) - half precision */
4890 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
4891 {
4892     TCGv_ptr fpst = NULL;
4893     TCGv_i32 tcg_op = read_fp_hreg(s, rn);
4894     TCGv_i32 tcg_res = tcg_temp_new_i32();
4895
4896     switch (opcode) {
4897     case 0x0: /* FMOV */
4898         tcg_gen_mov_i32(tcg_res, tcg_op);
4899         break;
4900     case 0x1: /* FABS */
4901         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
4902         break;
4903     case 0x2: /* FNEG */
4904         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
4905         break;
4906     case 0x3: /* FSQRT */
4907         fpst = get_fpstatus_ptr(true);
4908         gen_helper_sqrt_f16(tcg_res, tcg_op, fpst);
4909         break;
4910     case 0x8: /* FRINTN */
4911     case 0x9: /* FRINTP */
4912     case 0xa: /* FRINTM */
4913     case 0xb: /* FRINTZ */
4914     case 0xc: /* FRINTA */
4915     {
4916         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4917         fpst = get_fpstatus_ptr(true);
4918
4919         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4920         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
4921
4922         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4923         tcg_temp_free_i32(tcg_rmode);
4924         break;
4925     }
4926     case 0xe: /* FRINTX */
4927         fpst = get_fpstatus_ptr(true);
4928         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
4929         break;
4930     case 0xf: /* FRINTI */
4931         fpst = get_fpstatus_ptr(true);
4932         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
4933         break;
4934     default:
4935         abort();
4936     }
4937
4938     write_fp_sreg(s, rd, tcg_res);
4939
4940     if (fpst) {
4941         tcg_temp_free_ptr(fpst);
4942     }
4943     tcg_temp_free_i32(tcg_op);
4944     tcg_temp_free_i32(tcg_res);
4945 }
4946
4947 /* Floating-point data-processing (1 source) - single precision */
4948 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4949 {
4950     TCGv_ptr fpst;
4951     TCGv_i32 tcg_op;
4952     TCGv_i32 tcg_res;
4953
4954     fpst = get_fpstatus_ptr(false);
4955     tcg_op = read_fp_sreg(s, rn);
4956     tcg_res = tcg_temp_new_i32();
4957
4958     switch (opcode) {
4959     case 0x0: /* FMOV */
4960         tcg_gen_mov_i32(tcg_res, tcg_op);
4961         break;
4962     case 0x1: /* FABS */
4963         gen_helper_vfp_abss(tcg_res, tcg_op);
4964         break;
4965     case 0x2: /* FNEG */
4966         gen_helper_vfp_negs(tcg_res, tcg_op);
4967         break;
4968     case 0x3: /* FSQRT */
4969         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4970         break;
4971     case 0x8: /* FRINTN */
4972     case 0x9: /* FRINTP */
4973     case 0xa: /* FRINTM */
4974     case 0xb: /* FRINTZ */
4975     case 0xc: /* FRINTA */
4976     {
4977         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4978
4979         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4980         gen_helper_rints(tcg_res, tcg_op, fpst);
4981
4982         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4983         tcg_temp_free_i32(tcg_rmode);
4984         break;
4985     }
4986     case 0xe: /* FRINTX */
4987         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4988         break;
4989     case 0xf: /* FRINTI */
4990         gen_helper_rints(tcg_res, tcg_op, fpst);
4991         break;
4992     default:
4993         abort();
4994     }
4995
4996     write_fp_sreg(s, rd, tcg_res);
4997
4998     tcg_temp_free_ptr(fpst);
4999     tcg_temp_free_i32(tcg_op);
5000     tcg_temp_free_i32(tcg_res);
5001 }
5002
5003 /* Floating-point data-processing (1 source) - double precision */
5004 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
5005 {
5006     TCGv_ptr fpst;
5007     TCGv_i64 tcg_op;
5008     TCGv_i64 tcg_res;
5009
5010     switch (opcode) {
5011     case 0x0: /* FMOV */
5012         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
5013         return;
5014     }
5015
5016     fpst = get_fpstatus_ptr(false);
5017     tcg_op = read_fp_dreg(s, rn);
5018     tcg_res = tcg_temp_new_i64();
5019
5020     switch (opcode) {
5021     case 0x1: /* FABS */
5022         gen_helper_vfp_absd(tcg_res, tcg_op);
5023         break;
5024     case 0x2: /* FNEG */
5025         gen_helper_vfp_negd(tcg_res, tcg_op);
5026         break;
5027     case 0x3: /* FSQRT */
5028         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
5029         break;
5030     case 0x8: /* FRINTN */
5031     case 0x9: /* FRINTP */
5032     case 0xa: /* FRINTM */
5033     case 0xb: /* FRINTZ */
5034     case 0xc: /* FRINTA */
5035     {
5036         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
5037
5038         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5039         gen_helper_rintd(tcg_res, tcg_op, fpst);
5040
5041         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
5042         tcg_temp_free_i32(tcg_rmode);
5043         break;
5044     }
5045     case 0xe: /* FRINTX */
5046         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
5047         break;
5048     case 0xf: /* FRINTI */
5049         gen_helper_rintd(tcg_res, tcg_op, fpst);
5050         break;
5051     default:
5052         abort();
5053     }
5054
5055     write_fp_dreg(s, rd, tcg_res);
5056
5057     tcg_temp_free_ptr(fpst);
5058     tcg_temp_free_i64(tcg_op);
5059     tcg_temp_free_i64(tcg_res);
5060 }
5061
5062 static void handle_fp_fcvt(DisasContext *s, int opcode,
5063                            int rd, int rn, int dtype, int ntype)
5064 {
5065     switch (ntype) {
5066     case 0x0:
5067     {
5068         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5069         if (dtype == 1) {
5070             /* Single to double */
5071             TCGv_i64 tcg_rd = tcg_temp_new_i64();
5072             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
5073             write_fp_dreg(s, rd, tcg_rd);
5074             tcg_temp_free_i64(tcg_rd);
5075         } else {
5076             /* Single to half */
5077             TCGv_i32 tcg_rd = tcg_temp_new_i32();
5078             TCGv_i32 ahp = get_ahp_flag();
5079             TCGv_ptr fpst = get_fpstatus_ptr(false);
5080
5081             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5082             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5083             write_fp_sreg(s, rd, tcg_rd);
5084             tcg_temp_free_i32(tcg_rd);
5085             tcg_temp_free_i32(ahp);
5086             tcg_temp_free_ptr(fpst);
5087         }
5088         tcg_temp_free_i32(tcg_rn);
5089         break;
5090     }
5091     case 0x1:
5092     {
5093         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
5094         TCGv_i32 tcg_rd = tcg_temp_new_i32();
5095         if (dtype == 0) {
5096             /* Double to single */
5097             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
5098         } else {
5099             TCGv_ptr fpst = get_fpstatus_ptr(false);
5100             TCGv_i32 ahp = get_ahp_flag();
5101             /* Double to half */
5102             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
5103             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
5104             tcg_temp_free_ptr(fpst);
5105             tcg_temp_free_i32(ahp);
5106         }
5107         write_fp_sreg(s, rd, tcg_rd);
5108         tcg_temp_free_i32(tcg_rd);
5109         tcg_temp_free_i64(tcg_rn);
5110         break;
5111     }
5112     case 0x3:
5113     {
5114         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
5115         TCGv_ptr tcg_fpst = get_fpstatus_ptr(false);
5116         TCGv_i32 tcg_ahp = get_ahp_flag();
5117         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
5118         if (dtype == 0) {
5119             /* Half to single */
5120             TCGv_i32 tcg_rd = tcg_temp_new_i32();
5121             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5122             write_fp_sreg(s, rd, tcg_rd);
5123             tcg_temp_free_ptr(tcg_fpst);
5124             tcg_temp_free_i32(tcg_ahp);
5125             tcg_temp_free_i32(tcg_rd);
5126         } else {
5127             /* Half to double */
5128             TCGv_i64 tcg_rd = tcg_temp_new_i64();
5129             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
5130             write_fp_dreg(s, rd, tcg_rd);
5131             tcg_temp_free_i64(tcg_rd);
5132         }
5133         tcg_temp_free_i32(tcg_rn);
5134         break;
5135     }
5136     default:
5137         abort();
5138     }
5139 }
5140
5141 /* Floating point data-processing (1 source)
5142  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
5143  * +---+---+---+-----------+------+---+--------+-----------+------+------+
5144  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
5145  * +---+---+---+-----------+------+---+--------+-----------+------+------+
5146  */
5147 static void disas_fp_1src(DisasContext *s, uint32_t insn)
5148 {
5149     int type = extract32(insn, 22, 2);
5150     int opcode = extract32(insn, 15, 6);
5151     int rn = extract32(insn, 5, 5);
5152     int rd = extract32(insn, 0, 5);
5153
5154     switch (opcode) {
5155     case 0x4: case 0x5: case 0x7:
5156     {
5157         /* FCVT between half, single and double precision */
5158         int dtype = extract32(opcode, 0, 2);
5159         if (type == 2 || dtype == type) {
5160             unallocated_encoding(s);
5161             return;
5162         }
5163         if (!fp_access_check(s)) {
5164             return;
5165         }
5166
5167         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
5168         break;
5169     }
5170     case 0x0 ... 0x3:
5171     case 0x8 ... 0xc:
5172     case 0xe ... 0xf:
5173         /* 32-to-32 and 64-to-64 ops */
5174         switch (type) {
5175         case 0:
5176             if (!fp_access_check(s)) {
5177                 return;
5178             }
5179
5180             handle_fp_1src_single(s, opcode, rd, rn);
5181             break;
5182         case 1:
5183             if (!fp_access_check(s)) {
5184                 return;
5185             }
5186
5187             handle_fp_1src_double(s, opcode, rd, rn);
5188             break;
5189         case 3:
5190             if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
5191                 unallocated_encoding(s);
5192                 return;
5193             }
5194
5195             if (!fp_access_check(s)) {
5196                 return;
5197             }
5198
5199             handle_fp_1src_half(s, opcode, rd, rn);
5200             break;
5201         default:
5202             unallocated_encoding(s);
5203         }
5204         break;
5205     default:
5206         unallocated_encoding(s);
5207         break;
5208     }
5209 }
5210
5211 /* Floating-point data-processing (2 source) - single precision */
5212 static void handle_fp_2src_single(DisasContext *s, int opcode,
5213                                   int rd, int rn, int rm)
5214 {
5215     TCGv_i32 tcg_op1;
5216     TCGv_i32 tcg_op2;
5217     TCGv_i32 tcg_res;
5218     TCGv_ptr fpst;
5219
5220     tcg_res = tcg_temp_new_i32();
5221     fpst = get_fpstatus_ptr(false);
5222     tcg_op1 = read_fp_sreg(s, rn);
5223     tcg_op2 = read_fp_sreg(s, rm);
5224
5225     switch (opcode) {
5226     case 0x0: /* FMUL */
5227         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
5228         break;
5229     case 0x1: /* FDIV */
5230         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
5231         break;
5232     case 0x2: /* FADD */
5233         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
5234         break;
5235     case 0x3: /* FSUB */
5236         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
5237         break;
5238     case 0x4: /* FMAX */
5239         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
5240         break;
5241     case 0x5: /* FMIN */
5242         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
5243         break;
5244     case 0x6: /* FMAXNM */
5245         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
5246         break;
5247     case 0x7: /* FMINNM */
5248         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
5249         break;
5250     case 0x8: /* FNMUL */
5251         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
5252         gen_helper_vfp_negs(tcg_res, tcg_res);
5253         break;
5254     }
5255
5256     write_fp_sreg(s, rd, tcg_res);
5257
5258     tcg_temp_free_ptr(fpst);
5259     tcg_temp_free_i32(tcg_op1);
5260     tcg_temp_free_i32(tcg_op2);
5261     tcg_temp_free_i32(tcg_res);
5262 }
5263
5264 /* Floating-point data-processing (2 source) - double precision */
5265 static void handle_fp_2src_double(DisasContext *s, int opcode,
5266                                   int rd, int rn, int rm)
5267 {
5268     TCGv_i64 tcg_op1;
5269     TCGv_i64 tcg_op2;
5270     TCGv_i64 tcg_res;
5271     TCGv_ptr fpst;
5272
5273     tcg_res = tcg_temp_new_i64();
5274     fpst = get_fpstatus_ptr(false);
5275     tcg_op1 = read_fp_dreg(s, rn);
5276     tcg_op2 = read_fp_dreg(s, rm);
5277
5278     switch (opcode) {
5279     case 0x0: /* FMUL */
5280         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5281         break;
5282     case 0x1: /* FDIV */
5283         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
5284         break;
5285     case 0x2: /* FADD */
5286         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
5287         break;
5288     case 0x3: /* FSUB */
5289         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
5290         break;
5291     case 0x4: /* FMAX */
5292         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
5293         break;
5294     case 0x5: /* FMIN */
5295         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
5296         break;
5297     case 0x6: /* FMAXNM */
5298         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5299         break;
5300     case 0x7: /* FMINNM */
5301         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5302         break;
5303     case 0x8: /* FNMUL */
5304         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5305         gen_helper_vfp_negd(tcg_res, tcg_res);
5306         break;
5307     }
5308
5309     write_fp_dreg(s, rd, tcg_res);
5310
5311     tcg_temp_free_ptr(fpst);
5312     tcg_temp_free_i64(tcg_op1);
5313     tcg_temp_free_i64(tcg_op2);
5314     tcg_temp_free_i64(tcg_res);
5315 }
5316
5317 /* Floating-point data-processing (2 source) - half precision */
5318 static void handle_fp_2src_half(DisasContext *s, int opcode,
5319                                 int rd, int rn, int rm)
5320 {
5321     TCGv_i32 tcg_op1;
5322     TCGv_i32 tcg_op2;
5323     TCGv_i32 tcg_res;
5324     TCGv_ptr fpst;
5325
5326     tcg_res = tcg_temp_new_i32();
5327     fpst = get_fpstatus_ptr(true);
5328     tcg_op1 = read_fp_hreg(s, rn);
5329     tcg_op2 = read_fp_hreg(s, rm);
5330
5331     switch (opcode) {
5332     case 0x0: /* FMUL */
5333         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
5334         break;
5335     case 0x1: /* FDIV */
5336         gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
5337         break;
5338     case 0x2: /* FADD */
5339         gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
5340         break;
5341     case 0x3: /* FSUB */
5342         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
5343         break;
5344     case 0x4: /* FMAX */
5345         gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
5346         break;
5347     case 0x5: /* FMIN */
5348         gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
5349         break;
5350     case 0x6: /* FMAXNM */
5351         gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
5352         break;
5353     case 0x7: /* FMINNM */
5354         gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
5355         break;
5356     case 0x8: /* FNMUL */
5357         gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
5358         tcg_gen_xori_i32(tcg_res, tcg_res, 0x8000);
5359         break;
5360     default:
5361         g_assert_not_reached();
5362     }
5363
5364     write_fp_sreg(s, rd, tcg_res);
5365
5366     tcg_temp_free_ptr(fpst);
5367     tcg_temp_free_i32(tcg_op1);
5368     tcg_temp_free_i32(tcg_op2);
5369     tcg_temp_free_i32(tcg_res);
5370 }
5371
5372 /* Floating point data-processing (2 source)
5373  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
5374  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
5375  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
5376  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
5377  */
5378 static void disas_fp_2src(DisasContext *s, uint32_t insn)
5379 {
5380     int type = extract32(insn, 22, 2);
5381     int rd = extract32(insn, 0, 5);
5382     int rn = extract32(insn, 5, 5);
5383     int rm = extract32(insn, 16, 5);
5384     int opcode = extract32(insn, 12, 4);
5385
5386     if (opcode > 8) {
5387         unallocated_encoding(s);
5388         return;
5389     }
5390
5391     switch (type) {
5392     case 0:
5393         if (!fp_access_check(s)) {
5394             return;
5395         }
5396         handle_fp_2src_single(s, opcode, rd, rn, rm);
5397         break;
5398     case 1:
5399         if (!fp_access_check(s)) {
5400             return;
5401         }
5402         handle_fp_2src_double(s, opcode, rd, rn, rm);
5403         break;
5404     case 3:
5405         if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
5406             unallocated_encoding(s);
5407             return;
5408         }
5409         if (!fp_access_check(s)) {
5410             return;
5411         }
5412         handle_fp_2src_half(s, opcode, rd, rn, rm);
5413         break;
5414     default:
5415         unallocated_encoding(s);
5416     }
5417 }
5418
5419 /* Floating-point data-processing (3 source) - single precision */
5420 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
5421                                   int rd, int rn, int rm, int ra)
5422 {
5423     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
5424     TCGv_i32 tcg_res = tcg_temp_new_i32();
5425     TCGv_ptr fpst = get_fpstatus_ptr(false);
5426
5427     tcg_op1 = read_fp_sreg(s, rn);
5428     tcg_op2 = read_fp_sreg(s, rm);
5429     tcg_op3 = read_fp_sreg(s, ra);
5430
5431     /* These are fused multiply-add, and must be done as one
5432      * floating point operation with no rounding between the
5433      * multiplication and addition steps.
5434      * NB that doing the negations here as separate steps is
5435      * correct : an input NaN should come out with its sign bit
5436      * flipped if it is a negated-input.
5437      */
5438     if (o1 == true) {
5439         gen_helper_vfp_negs(tcg_op3, tcg_op3);
5440     }
5441
5442     if (o0 != o1) {
5443         gen_helper_vfp_negs(tcg_op1, tcg_op1);
5444     }
5445
5446     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
5447
5448     write_fp_sreg(s, rd, tcg_res);
5449
5450     tcg_temp_free_ptr(fpst);
5451     tcg_temp_free_i32(tcg_op1);
5452     tcg_temp_free_i32(tcg_op2);
5453     tcg_temp_free_i32(tcg_op3);
5454     tcg_temp_free_i32(tcg_res);
5455 }
5456
5457 /* Floating-point data-processing (3 source) - double precision */
5458 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
5459                                   int rd, int rn, int rm, int ra)
5460 {
5461     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
5462     TCGv_i64 tcg_res = tcg_temp_new_i64();
5463     TCGv_ptr fpst = get_fpstatus_ptr(false);
5464
5465     tcg_op1 = read_fp_dreg(s, rn);
5466     tcg_op2 = read_fp_dreg(s, rm);
5467     tcg_op3 = read_fp_dreg(s, ra);
5468
5469     /* These are fused multiply-add, and must be done as one
5470      * floating point operation with no rounding between the
5471      * multiplication and addition steps.
5472      * NB that doing the negations here as separate steps is
5473      * correct : an input NaN should come out with its sign bit
5474      * flipped if it is a negated-input.
5475      */
5476     if (o1 == true) {
5477         gen_helper_vfp_negd(tcg_op3, tcg_op3);
5478     }
5479
5480     if (o0 != o1) {
5481         gen_helper_vfp_negd(tcg_op1, tcg_op1);
5482     }
5483
5484     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
5485
5486     write_fp_dreg(s, rd, tcg_res);
5487
5488     tcg_temp_free_ptr(fpst);
5489     tcg_temp_free_i64(tcg_op1);
5490     tcg_temp_free_i64(tcg_op2);
5491     tcg_temp_free_i64(tcg_op3);
5492     tcg_temp_free_i64(tcg_res);
5493 }
5494
5495 /* Floating-point data-processing (3 source) - half precision */
5496 static void handle_fp_3src_half(DisasContext *s, bool o0, bool o1,
5497                                 int rd, int rn, int rm, int ra)
5498 {
5499     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
5500     TCGv_i32 tcg_res = tcg_temp_new_i32();
5501     TCGv_ptr fpst = get_fpstatus_ptr(true);
5502
5503     tcg_op1 = read_fp_hreg(s, rn);
5504     tcg_op2 = read_fp_hreg(s, rm);
5505     tcg_op3 = read_fp_hreg(s, ra);
5506
5507     /* These are fused multiply-add, and must be done as one
5508      * floating point operation with no rounding between the
5509      * multiplication and addition steps.
5510      * NB that doing the negations here as separate steps is
5511      * correct : an input NaN should come out with its sign bit
5512      * flipped if it is a negated-input.
5513      */
5514     if (o1 == true) {
5515         tcg_gen_xori_i32(tcg_op3, tcg_op3, 0x8000);
5516     }
5517
5518     if (o0 != o1) {
5519         tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
5520     }
5521
5522     gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
5523
5524     write_fp_sreg(s, rd, tcg_res);
5525
5526     tcg_temp_free_ptr(fpst);
5527     tcg_temp_free_i32(tcg_op1);
5528     tcg_temp_free_i32(tcg_op2);
5529     tcg_temp_free_i32(tcg_op3);
5530     tcg_temp_free_i32(tcg_res);
5531 }
5532
5533 /* Floating point data-processing (3 source)
5534  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
5535  * +---+---+---+-----------+------+----+------+----+------+------+------+
5536  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
5537  * +---+---+---+-----------+------+----+------+----+------+------+------+
5538  */
5539 static void disas_fp_3src(DisasContext *s, uint32_t insn)
5540 {
5541     int type = extract32(insn, 22, 2);
5542     int rd = extract32(insn, 0, 5);
5543     int rn = extract32(insn, 5, 5);
5544     int ra = extract32(insn, 10, 5);
5545     int rm = extract32(insn, 16, 5);
5546     bool o0 = extract32(insn, 15, 1);
5547     bool o1 = extract32(insn, 21, 1);
5548
5549     switch (type) {
5550     case 0:
5551         if (!fp_access_check(s)) {
5552             return;
5553         }
5554         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
5555         break;
5556     case 1:
5557         if (!fp_access_check(s)) {
5558             return;
5559         }
5560         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
5561         break;
5562     case 3:
5563         if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
5564             unallocated_encoding(s);
5565             return;
5566         }
5567         if (!fp_access_check(s)) {
5568             return;
5569         }
5570         handle_fp_3src_half(s, o0, o1, rd, rn, rm, ra);
5571         break;
5572     default:
5573         unallocated_encoding(s);
5574     }
5575 }
5576
5577 /* The imm8 encodes the sign bit, enough bits to represent an exponent in
5578  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
5579  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
5580  */
5581 uint64_t vfp_expand_imm(int size, uint8_t imm8)
5582 {
5583     uint64_t imm;
5584
5585     switch (size) {
5586     case MO_64:
5587         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5588             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
5589             extract32(imm8, 0, 6);
5590         imm <<= 48;
5591         break;
5592     case MO_32:
5593         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5594             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
5595             (extract32(imm8, 0, 6) << 3);
5596         imm <<= 16;
5597         break;
5598     case MO_16:
5599         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5600             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
5601             (extract32(imm8, 0, 6) << 6);
5602         break;
5603     default:
5604         g_assert_not_reached();
5605     }
5606     return imm;
5607 }
5608
5609 /* Floating point immediate
5610  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
5611  * +---+---+---+-----------+------+---+------------+-------+------+------+
5612  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
5613  * +---+---+---+-----------+------+---+------------+-------+------+------+
5614  */
5615 static void disas_fp_imm(DisasContext *s, uint32_t insn)
5616 {
5617     int rd = extract32(insn, 0, 5);
5618     int imm8 = extract32(insn, 13, 8);
5619     int type = extract32(insn, 22, 2);
5620     uint64_t imm;
5621     TCGv_i64 tcg_res;
5622     TCGMemOp sz;
5623
5624     switch (type) {
5625     case 0:
5626         sz = MO_32;
5627         break;
5628     case 1:
5629         sz = MO_64;
5630         break;
5631     case 3:
5632         sz = MO_16;
5633         if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
5634             break;
5635         }
5636         /* fallthru */
5637     default:
5638         unallocated_encoding(s);
5639         return;
5640     }
5641
5642     if (!fp_access_check(s)) {
5643         return;
5644     }
5645
5646     imm = vfp_expand_imm(sz, imm8);
5647
5648     tcg_res = tcg_const_i64(imm);
5649     write_fp_dreg(s, rd, tcg_res);
5650     tcg_temp_free_i64(tcg_res);
5651 }
5652
5653 /* Handle floating point <=> fixed point conversions. Note that we can
5654  * also deal with fp <=> integer conversions as a special case (scale == 64)
5655  * OPTME: consider handling that special case specially or at least skipping
5656  * the call to scalbn in the helpers for zero shifts.
5657  */
5658 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
5659                            bool itof, int rmode, int scale, int sf, int type)
5660 {
5661     bool is_signed = !(opcode & 1);
5662     TCGv_ptr tcg_fpstatus;
5663     TCGv_i32 tcg_shift, tcg_single;
5664     TCGv_i64 tcg_double;
5665
5666     tcg_fpstatus = get_fpstatus_ptr(type == 3);
5667
5668     tcg_shift = tcg_const_i32(64 - scale);
5669
5670     if (itof) {
5671         TCGv_i64 tcg_int = cpu_reg(s, rn);
5672         if (!sf) {
5673             TCGv_i64 tcg_extend = new_tmp_a64(s);
5674
5675             if (is_signed) {
5676                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
5677             } else {
5678                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
5679             }
5680
5681             tcg_int = tcg_extend;
5682         }
5683
5684         switch (type) {
5685         case 1: /* float64 */
5686             tcg_double = tcg_temp_new_i64();
5687             if (is_signed) {
5688                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
5689                                      tcg_shift, tcg_fpstatus);
5690             } else {
5691                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
5692                                      tcg_shift, tcg_fpstatus);
5693             }
5694             write_fp_dreg(s, rd, tcg_double);
5695             tcg_temp_free_i64(tcg_double);
5696             break;
5697
5698         case 0: /* float32 */
5699             tcg_single = tcg_temp_new_i32();
5700             if (is_signed) {
5701                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
5702                                      tcg_shift, tcg_fpstatus);
5703             } else {
5704                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
5705                                      tcg_shift, tcg_fpstatus);
5706             }
5707             write_fp_sreg(s, rd, tcg_single);
5708             tcg_temp_free_i32(tcg_single);
5709             break;
5710
5711         case 3: /* float16 */
5712             tcg_single = tcg_temp_new_i32();
5713             if (is_signed) {
5714                 gen_helper_vfp_sqtoh(tcg_single, tcg_int,
5715                                      tcg_shift, tcg_fpstatus);
5716             } else {
5717                 gen_helper_vfp_uqtoh(tcg_single, tcg_int,
5718                                      tcg_shift, tcg_fpstatus);
5719             }
5720             write_fp_sreg(s, rd, tcg_single);
5721             tcg_temp_free_i32(tcg_single);
5722             break;
5723
5724         default:
5725             g_assert_not_reached();
5726         }
5727     } else {
5728         TCGv_i64 tcg_int = cpu_reg(s, rd);
5729         TCGv_i32 tcg_rmode;
5730
5731         if (extract32(opcode, 2, 1)) {
5732             /* There are too many rounding modes to all fit into rmode,
5733              * so FCVTA[US] is a special case.
5734              */
5735             rmode = FPROUNDING_TIEAWAY;
5736         }
5737
5738         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5739
5740         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
5741
5742         switch (type) {
5743         case 1: /* float64 */
5744             tcg_double = read_fp_dreg(s, rn);
5745             if (is_signed) {
5746                 if (!sf) {
5747                     gen_helper_vfp_tosld(tcg_int, tcg_double,
5748                                          tcg_shift, tcg_fpstatus);
5749                 } else {
5750                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
5751                                          tcg_shift, tcg_fpstatus);
5752                 }
5753             } else {
5754                 if (!sf) {
5755                     gen_helper_vfp_tould(tcg_int, tcg_double,
5756                                          tcg_shift, tcg_fpstatus);
5757                 } else {
5758                     gen_helper_vfp_touqd(tcg_int, tcg_double,
5759                                          tcg_shift, tcg_fpstatus);
5760                 }
5761             }
5762             if (!sf) {
5763                 tcg_gen_ext32u_i64(tcg_int, tcg_int);
5764             }
5765             tcg_temp_free_i64(tcg_double);
5766             break;
5767
5768         case 0: /* float32 */
5769             tcg_single = read_fp_sreg(s, rn);
5770             if (sf) {
5771                 if (is_signed) {
5772                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
5773                                          tcg_shift, tcg_fpstatus);
5774                 } else {
5775                     gen_helper_vfp_touqs(tcg_int, tcg_single,
5776                                          tcg_shift, tcg_fpstatus);
5777                 }
5778             } else {
5779                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
5780                 if (is_signed) {
5781                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
5782                                          tcg_shift, tcg_fpstatus);
5783                 } else {
5784                     gen_helper_vfp_touls(tcg_dest, tcg_single,
5785                                          tcg_shift, tcg_fpstatus);
5786                 }
5787                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5788                 tcg_temp_free_i32(tcg_dest);
5789             }
5790             tcg_temp_free_i32(tcg_single);
5791             break;
5792
5793         case 3: /* float16 */
5794             tcg_single = read_fp_sreg(s, rn);
5795             if (sf) {
5796                 if (is_signed) {
5797                     gen_helper_vfp_tosqh(tcg_int, tcg_single,
5798                                          tcg_shift, tcg_fpstatus);
5799                 } else {
5800                     gen_helper_vfp_touqh(tcg_int, tcg_single,
5801                                          tcg_shift, tcg_fpstatus);
5802                 }
5803             } else {
5804                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
5805                 if (is_signed) {
5806                     gen_helper_vfp_toslh(tcg_dest, tcg_single,
5807                                          tcg_shift, tcg_fpstatus);
5808                 } else {
5809                     gen_helper_vfp_toulh(tcg_dest, tcg_single,
5810                                          tcg_shift, tcg_fpstatus);
5811                 }
5812                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5813                 tcg_temp_free_i32(tcg_dest);
5814             }
5815             tcg_temp_free_i32(tcg_single);
5816             break;
5817
5818         default:
5819             g_assert_not_reached();
5820         }
5821
5822         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
5823         tcg_temp_free_i32(tcg_rmode);
5824     }
5825
5826     tcg_temp_free_ptr(tcg_fpstatus);
5827     tcg_temp_free_i32(tcg_shift);
5828 }
5829
5830 /* Floating point <-> fixed point conversions
5831  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
5832  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5833  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
5834  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5835  */
5836 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
5837 {
5838     int rd = extract32(insn, 0, 5);
5839     int rn = extract32(insn, 5, 5);
5840     int scale = extract32(insn, 10, 6);
5841     int opcode = extract32(insn, 16, 3);
5842     int rmode = extract32(insn, 19, 2);
5843     int type = extract32(insn, 22, 2);
5844     bool sbit = extract32(insn, 29, 1);
5845     bool sf = extract32(insn, 31, 1);
5846     bool itof;
5847
5848     if (sbit || (!sf && scale < 32)) {
5849         unallocated_encoding(s);
5850         return;
5851     }
5852
5853     switch (type) {
5854     case 0: /* float32 */
5855     case 1: /* float64 */
5856         break;
5857     case 3: /* float16 */
5858         if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
5859             break;
5860         }
5861         /* fallthru */
5862     default:
5863         unallocated_encoding(s);
5864         return;
5865     }
5866
5867     switch ((rmode << 3) | opcode) {
5868     case 0x2: /* SCVTF */
5869     case 0x3: /* UCVTF */
5870         itof = true;
5871         break;
5872     case 0x18: /* FCVTZS */
5873     case 0x19: /* FCVTZU */
5874         itof = false;
5875         break;
5876     default:
5877         unallocated_encoding(s);
5878         return;
5879     }
5880
5881     if (!fp_access_check(s)) {
5882         return;
5883     }
5884
5885     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5886 }
5887
5888 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5889 {
5890     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5891      * without conversion.
5892      */
5893
5894     if (itof) {
5895         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5896         TCGv_i64 tmp;
5897
5898         switch (type) {
5899         case 0:
5900             /* 32 bit */
5901             tmp = tcg_temp_new_i64();
5902             tcg_gen_ext32u_i64(tmp, tcg_rn);
5903             write_fp_dreg(s, rd, tmp);
5904             tcg_temp_free_i64(tmp);
5905             break;
5906         case 1:
5907             /* 64 bit */
5908             write_fp_dreg(s, rd, tcg_rn);
5909             break;
5910         case 2:
5911             /* 64 bit to top half. */
5912             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5913             clear_vec_high(s, true, rd);
5914             break;
5915         case 3:
5916             /* 16 bit */
5917             tmp = tcg_temp_new_i64();
5918             tcg_gen_ext16u_i64(tmp, tcg_rn);
5919             write_fp_dreg(s, rd, tmp);
5920             tcg_temp_free_i64(tmp);
5921             break;
5922         default:
5923             g_assert_not_reached();
5924         }
5925     } else {
5926         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5927
5928         switch (type) {
5929         case 0:
5930             /* 32 bit */
5931             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5932             break;
5933         case 1:
5934             /* 64 bit */
5935             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5936             break;
5937         case 2:
5938             /* 64 bits from top half */
5939             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5940             break;
5941         case 3:
5942             /* 16 bit */
5943             tcg_gen_ld16u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_16));
5944             break;
5945         default:
5946             g_assert_not_reached();
5947         }
5948     }
5949 }
5950
5951 /* Floating point <-> integer conversions
5952  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5953  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5954  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5955  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5956  */
5957 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5958 {
5959     int rd = extract32(insn, 0, 5);
5960     int rn = extract32(insn, 5, 5);
5961     int opcode = extract32(insn, 16, 3);
5962     int rmode = extract32(insn, 19, 2);
5963     int type = extract32(insn, 22, 2);
5964     bool sbit = extract32(insn, 29, 1);
5965     bool sf = extract32(insn, 31, 1);
5966
5967     if (sbit) {
5968         unallocated_encoding(s);
5969         return;
5970     }
5971
5972     if (opcode > 5) {
5973         /* FMOV */
5974         bool itof = opcode & 1;
5975
5976         if (rmode >= 2) {
5977             unallocated_encoding(s);
5978             return;
5979         }
5980
5981         switch (sf << 3 | type << 1 | rmode) {
5982         case 0x0: /* 32 bit */
5983         case 0xa: /* 64 bit */
5984         case 0xd: /* 64 bit to top half of quad */
5985             break;
5986         case 0x6: /* 16-bit float, 32-bit int */
5987         case 0xe: /* 16-bit float, 64-bit int */
5988             if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
5989                 break;
5990             }
5991             /* fallthru */
5992         default:
5993             /* all other sf/type/rmode combinations are invalid */
5994             unallocated_encoding(s);
5995             return;
5996         }
5997
5998         if (!fp_access_check(s)) {
5999             return;
6000         }
6001         handle_fmov(s, rd, rn, type, itof);
6002     } else {
6003         /* actual FP conversions */
6004         bool itof = extract32(opcode, 1, 1);
6005
6006         if (rmode != 0 && opcode > 1) {
6007             unallocated_encoding(s);
6008             return;
6009         }
6010         switch (type) {
6011         case 0: /* float32 */
6012         case 1: /* float64 */
6013             break;
6014         case 3: /* float16 */
6015             if (arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
6016                 break;
6017             }
6018             /* fallthru */
6019         default:
6020             unallocated_encoding(s);
6021             return;
6022         }
6023
6024         if (!fp_access_check(s)) {
6025             return;
6026         }
6027         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
6028     }
6029 }
6030
6031 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
6032  *   31  30  29 28     25 24                          0
6033  * +---+---+---+---------+-----------------------------+
6034  * |   | 0 |   | 1 1 1 1 |                             |
6035  * +---+---+---+---------+-----------------------------+
6036  */
6037 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
6038 {
6039     if (extract32(insn, 24, 1)) {
6040         /* Floating point data-processing (3 source) */
6041         disas_fp_3src(s, insn);
6042     } else if (extract32(insn, 21, 1) == 0) {
6043         /* Floating point to fixed point conversions */
6044         disas_fp_fixed_conv(s, insn);
6045     } else {
6046         switch (extract32(insn, 10, 2)) {
6047         case 1:
6048             /* Floating point conditional compare */
6049             disas_fp_ccomp(s, insn);
6050             break;
6051         case 2:
6052             /* Floating point data-processing (2 source) */
6053             disas_fp_2src(s, insn);
6054             break;
6055         case 3:
6056             /* Floating point conditional select */
6057             disas_fp_csel(s, insn);
6058             break;
6059         case 0:
6060             switch (ctz32(extract32(insn, 12, 4))) {
6061             case 0: /* [15:12] == xxx1 */
6062                 /* Floating point immediate */
6063                 disas_fp_imm(s, insn);
6064                 break;
6065             case 1: /* [15:12] == xx10 */
6066                 /* Floating point compare */
6067                 disas_fp_compare(s, insn);
6068                 break;
6069             case 2: /* [15:12] == x100 */
6070                 /* Floating point data-processing (1 source) */
6071                 disas_fp_1src(s, insn);
6072                 break;
6073             case 3: /* [15:12] == 1000 */
6074                 unallocated_encoding(s);
6075                 break;
6076             default: /* [15:12] == 0000 */
6077                 /* Floating point <-> integer conversions */
6078                 disas_fp_int_conv(s, insn);
6079                 break;
6080             }
6081             break;
6082         }
6083     }
6084 }
6085
6086 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
6087                      int pos)
6088 {
6089     /* Extract 64 bits from the middle of two concatenated 64 bit
6090      * vector register slices left:right. The extracted bits start
6091      * at 'pos' bits into the right (least significant) side.
6092      * We return the result in tcg_right, and guarantee not to
6093      * trash tcg_left.
6094      */
6095     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
6096     assert(pos > 0 && pos < 64);
6097
6098     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
6099     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
6100     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
6101
6102     tcg_temp_free_i64(tcg_tmp);
6103 }
6104
6105 /* EXT
6106  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
6107  * +---+---+-------------+-----+---+------+---+------+---+------+------+
6108  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
6109  * +---+---+-------------+-----+---+------+---+------+---+------+------+
6110  */
6111 static void disas_simd_ext(DisasContext *s, uint32_t insn)
6112 {
6113     int is_q = extract32(insn, 30, 1);
6114     int op2 = extract32(insn, 22, 2);
6115     int imm4 = extract32(insn, 11, 4);
6116     int rm = extract32(insn, 16, 5);
6117     int rn = extract32(insn, 5, 5);
6118     int rd = extract32(insn, 0, 5);
6119     int pos = imm4 << 3;
6120     TCGv_i64 tcg_resl, tcg_resh;
6121
6122     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
6123         unallocated_encoding(s);
6124         return;
6125     }
6126
6127     if (!fp_access_check(s)) {
6128         return;
6129     }
6130
6131     tcg_resh = tcg_temp_new_i64();
6132     tcg_resl = tcg_temp_new_i64();
6133
6134     /* Vd gets bits starting at pos bits into Vm:Vn. This is
6135      * either extracting 128 bits from a 128:128 concatenation, or
6136      * extracting 64 bits from a 64:64 concatenation.
6137      */
6138     if (!is_q) {
6139         read_vec_element(s, tcg_resl, rn, 0, MO_64);
6140         if (pos != 0) {
6141             read_vec_element(s, tcg_resh, rm, 0, MO_64);
6142             do_ext64(s, tcg_resh, tcg_resl, pos);
6143         }
6144         tcg_gen_movi_i64(tcg_resh, 0);
6145     } else {
6146         TCGv_i64 tcg_hh;
6147         typedef struct {
6148             int reg;
6149             int elt;
6150         } EltPosns;
6151         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
6152         EltPosns *elt = eltposns;
6153
6154         if (pos >= 64) {
6155             elt++;
6156             pos -= 64;
6157         }
6158
6159         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
6160         elt++;
6161         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
6162         elt++;
6163         if (pos != 0) {
6164             do_ext64(s, tcg_resh, tcg_resl, pos);
6165             tcg_hh = tcg_temp_new_i64();
6166             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
6167             do_ext64(s, tcg_hh, tcg_resh, pos);
6168             tcg_temp_free_i64(tcg_hh);
6169         }
6170     }
6171
6172     write_vec_element(s, tcg_resl, rd, 0, MO_64);
6173     tcg_temp_free_i64(tcg_resl);
6174     write_vec_element(s, tcg_resh, rd, 1, MO_64);
6175     tcg_temp_free_i64(tcg_resh);
6176 }
6177
6178 /* TBL/TBX
6179  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
6180  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6181  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
6182  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
6183  */
6184 static void disas_simd_tb(DisasContext *s, uint32_t insn)
6185 {
6186     int op2 = extract32(insn, 22, 2);
6187     int is_q = extract32(insn, 30, 1);
6188     int rm = extract32(insn, 16, 5);
6189     int rn = extract32(insn, 5, 5);
6190     int rd = extract32(insn, 0, 5);
6191     int is_tblx = extract32(insn, 12, 1);
6192     int len = extract32(insn, 13, 2);
6193     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
6194     TCGv_i32 tcg_regno, tcg_numregs;
6195
6196     if (op2 != 0) {
6197         unallocated_encoding(s);
6198         return;
6199     }
6200
6201     if (!fp_access_check(s)) {
6202         return;
6203     }
6204
6205     /* This does a table lookup: for every byte element in the input
6206      * we index into a table formed from up to four vector registers,
6207      * and then the output is the result of the lookups. Our helper
6208      * function does the lookup operation for a single 64 bit part of
6209      * the input.
6210      */
6211     tcg_resl = tcg_temp_new_i64();
6212     tcg_resh = tcg_temp_new_i64();
6213
6214     if (is_tblx) {
6215         read_vec_element(s, tcg_resl, rd, 0, MO_64);
6216     } else {
6217         tcg_gen_movi_i64(tcg_resl, 0);
6218     }
6219     if (is_tblx && is_q) {
6220         read_vec_element(s, tcg_resh, rd, 1, MO_64);
6221     } else {
6222         tcg_gen_movi_i64(tcg_resh, 0);
6223     }
6224
6225     tcg_idx = tcg_temp_new_i64();
6226     tcg_regno = tcg_const_i32(rn);
6227     tcg_numregs = tcg_const_i32(len + 1);
6228     read_vec_element(s, tcg_idx, rm, 0, MO_64);
6229     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
6230                         tcg_regno, tcg_numregs);
6231     if (is_q) {
6232         read_vec_element(s, tcg_idx, rm, 1, MO_64);
6233         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
6234                             tcg_regno, tcg_numregs);
6235     }
6236     tcg_temp_free_i64(tcg_idx);
6237     tcg_temp_free_i32(tcg_regno);
6238     tcg_temp_free_i32(tcg_numregs);
6239
6240     write_vec_element(s, tcg_resl, rd, 0, MO_64);
6241     tcg_temp_free_i64(tcg_resl);
6242     write_vec_element(s, tcg_resh, rd, 1, MO_64);
6243     tcg_temp_free_i64(tcg_resh);
6244 }
6245
6246 /* ZIP/UZP/TRN
6247  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
6248  * +---+---+-------------+------+---+------+---+------------------+------+
6249  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
6250  * +---+---+-------------+------+---+------+---+------------------+------+
6251  */
6252 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
6253 {
6254     int rd = extract32(insn, 0, 5);
6255     int rn = extract32(insn, 5, 5);
6256     int rm = extract32(insn, 16, 5);
6257     int size = extract32(insn, 22, 2);
6258     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
6259      * bit 2 indicates 1 vs 2 variant of the insn.
6260      */
6261     int opcode = extract32(insn, 12, 2);
6262     bool part = extract32(insn, 14, 1);
6263     bool is_q = extract32(insn, 30, 1);
6264     int esize = 8 << size;
6265     int i, ofs;
6266     int datasize = is_q ? 128 : 64;
6267     int elements = datasize / esize;
6268     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
6269
6270     if (opcode == 0 || (size == 3 && !is_q)) {
6271         unallocated_encoding(s);
6272         return;
6273     }
6274
6275     if (!fp_access_check(s)) {
6276         return;
6277     }
6278
6279     tcg_resl = tcg_const_i64(0);
6280     tcg_resh = tcg_const_i64(0);
6281     tcg_res = tcg_temp_new_i64();
6282
6283     for (i = 0; i < elements; i++) {
6284         switch (opcode) {
6285         case 1: /* UZP1/2 */
6286         {
6287             int midpoint = elements / 2;
6288             if (i < midpoint) {
6289                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
6290             } else {
6291                 read_vec_element(s, tcg_res, rm,
6292                                  2 * (i - midpoint) + part, size);
6293             }
6294             break;
6295         }
6296         case 2: /* TRN1/2 */
6297             if (i & 1) {
6298                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
6299             } else {
6300                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
6301             }
6302             break;
6303         case 3: /* ZIP1/2 */
6304         {
6305             int base = part * elements / 2;
6306             if (i & 1) {
6307                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
6308             } else {
6309                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
6310             }
6311             break;
6312         }
6313         default:
6314             g_assert_not_reached();
6315         }
6316
6317         ofs = i * esize;
6318         if (ofs < 64) {
6319             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
6320             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
6321         } else {
6322             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
6323             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
6324         }
6325     }
6326
6327     tcg_temp_free_i64(tcg_res);
6328
6329     write_vec_element(s, tcg_resl, rd, 0, MO_64);
6330     tcg_temp_free_i64(tcg_resl);
6331     write_vec_element(s, tcg_resh, rd, 1, MO_64);
6332     tcg_temp_free_i64(tcg_resh);
6333 }
6334
6335 /*
6336  * do_reduction_op helper
6337  *
6338  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
6339  * important for correct NaN propagation that we do these
6340  * operations in exactly the order specified by the pseudocode.
6341  *
6342  * This is a recursive function, TCG temps should be freed by the
6343  * calling function once it is done with the values.
6344  */
6345 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
6346                                 int esize, int size, int vmap, TCGv_ptr fpst)
6347 {
6348     if (esize == size) {
6349         int element;
6350         TCGMemOp msize = esize == 16 ? MO_16 : MO_32;
6351         TCGv_i32 tcg_elem;
6352
6353         /* We should have one register left here */
6354         assert(ctpop8(vmap) == 1);
6355         element = ctz32(vmap);
6356         assert(element < 8);
6357
6358         tcg_elem = tcg_temp_new_i32();
6359         read_vec_element_i32(s, tcg_elem, rn, element, msize);
6360         return tcg_elem;
6361     } else {
6362         int bits = size / 2;
6363         int shift = ctpop8(vmap) / 2;
6364         int vmap_lo = (vmap >> shift) & vmap;
6365         int vmap_hi = (vmap & ~vmap_lo);
6366         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
6367
6368         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
6369         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
6370         tcg_res = tcg_temp_new_i32();
6371
6372         switch (fpopcode) {
6373         case 0x0c: /* fmaxnmv half-precision */
6374             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
6375             break;
6376         case 0x0f: /* fmaxv half-precision */
6377             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
6378             break;
6379         case 0x1c: /* fminnmv half-precision */
6380             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
6381             break;
6382         case 0x1f: /* fminv half-precision */
6383             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
6384             break;
6385         case 0x2c: /* fmaxnmv */
6386             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
6387             break;
6388         case 0x2f: /* fmaxv */
6389             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
6390             break;
6391         case 0x3c: /* fminnmv */
6392             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
6393             break;
6394         case 0x3f: /* fminv */
6395             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
6396             break;
6397         default:
6398             g_assert_not_reached();
6399         }
6400
6401         tcg_temp_free_i32(tcg_hi);
6402         tcg_temp_free_i32(tcg_lo);
6403         return tcg_res;
6404     }
6405 }
6406
6407 /* AdvSIMD across lanes
6408  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6409  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
6410  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6411  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
6412  */
6413 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
6414 {
6415     int rd = extract32(insn, 0, 5);
6416     int rn = extract32(insn, 5, 5);
6417     int size = extract32(insn, 22, 2);
6418     int opcode = extract32(insn, 12, 5);
6419     bool is_q = extract32(insn, 30, 1);
6420     bool is_u = extract32(insn, 29, 1);
6421     bool is_fp = false;
6422     bool is_min = false;
6423     int esize;
6424     int elements;
6425     int i;
6426     TCGv_i64 tcg_res, tcg_elt;
6427
6428     switch (opcode) {
6429     case 0x1b: /* ADDV */
6430         if (is_u) {
6431             unallocated_encoding(s);
6432             return;
6433         }
6434         /* fall through */
6435     case 0x3: /* SADDLV, UADDLV */
6436     case 0xa: /* SMAXV, UMAXV */
6437     case 0x1a: /* SMINV, UMINV */
6438         if (size == 3 || (size == 2 && !is_q)) {
6439             unallocated_encoding(s);
6440             return;
6441         }
6442         break;
6443     case 0xc: /* FMAXNMV, FMINNMV */
6444     case 0xf: /* FMAXV, FMINV */
6445         /* Bit 1 of size field encodes min vs max and the actual size
6446          * depends on the encoding of the U bit. If not set (and FP16
6447          * enabled) then we do half-precision float instead of single
6448          * precision.
6449          */
6450         is_min = extract32(size, 1, 1);
6451         is_fp = true;
6452         if (!is_u && arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
6453             size = 1;
6454         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
6455             unallocated_encoding(s);
6456             return;
6457         } else {
6458             size = 2;
6459         }
6460         break;
6461     default:
6462         unallocated_encoding(s);
6463         return;
6464     }
6465
6466     if (!fp_access_check(s)) {
6467         return;
6468     }
6469
6470     esize = 8 << size;
6471     elements = (is_q ? 128 : 64) / esize;
6472
6473     tcg_res = tcg_temp_new_i64();
6474     tcg_elt = tcg_temp_new_i64();
6475
6476     /* These instructions operate across all lanes of a vector
6477      * to produce a single result. We can guarantee that a 64
6478      * bit intermediate is sufficient:
6479      *  + for [US]ADDLV the maximum element size is 32 bits, and
6480      *    the result type is 64 bits
6481      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
6482      *    same as the element size, which is 32 bits at most
6483      * For the integer operations we can choose to work at 64
6484      * or 32 bits and truncate at the end; for simplicity
6485      * we use 64 bits always. The floating point
6486      * ops do require 32 bit intermediates, though.
6487      */
6488     if (!is_fp) {
6489         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
6490
6491         for (i = 1; i < elements; i++) {
6492             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
6493
6494             switch (opcode) {
6495             case 0x03: /* SADDLV / UADDLV */
6496             case 0x1b: /* ADDV */
6497                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
6498                 break;
6499             case 0x0a: /* SMAXV / UMAXV */
6500                 if (is_u) {
6501                     tcg_gen_umax_i64(tcg_res, tcg_res, tcg_elt);
6502                 } else {
6503                     tcg_gen_smax_i64(tcg_res, tcg_res, tcg_elt);
6504                 }
6505                 break;
6506             case 0x1a: /* SMINV / UMINV */
6507                 if (is_u) {
6508                     tcg_gen_umin_i64(tcg_res, tcg_res, tcg_elt);
6509                 } else {
6510                     tcg_gen_smin_i64(tcg_res, tcg_res, tcg_elt);
6511                 }
6512                 break;
6513             default:
6514                 g_assert_not_reached();
6515             }
6516
6517         }
6518     } else {
6519         /* Floating point vector reduction ops which work across 32
6520          * bit (single) or 16 bit (half-precision) intermediates.
6521          * Note that correct NaN propagation requires that we do these
6522          * operations in exactly the order specified by the pseudocode.
6523          */
6524         TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
6525         int fpopcode = opcode | is_min << 4 | is_u << 5;
6526         int vmap = (1 << elements) - 1;
6527         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
6528                                              (is_q ? 128 : 64), vmap, fpst);
6529         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
6530         tcg_temp_free_i32(tcg_res32);
6531         tcg_temp_free_ptr(fpst);
6532     }
6533
6534     tcg_temp_free_i64(tcg_elt);
6535
6536     /* Now truncate the result to the width required for the final output */
6537     if (opcode == 0x03) {
6538         /* SADDLV, UADDLV: result is 2*esize */
6539         size++;
6540     }
6541
6542     switch (size) {
6543     case 0:
6544         tcg_gen_ext8u_i64(tcg_res, tcg_res);
6545         break;
6546     case 1:
6547         tcg_gen_ext16u_i64(tcg_res, tcg_res);
6548         break;
6549     case 2:
6550         tcg_gen_ext32u_i64(tcg_res, tcg_res);
6551         break;
6552     case 3:
6553         break;
6554     default:
6555         g_assert_not_reached();
6556     }
6557
6558     write_fp_dreg(s, rd, tcg_res);
6559     tcg_temp_free_i64(tcg_res);
6560 }
6561
6562 /* DUP (Element, Vector)
6563  *
6564  *  31  30   29              21 20    16 15        10  9    5 4    0
6565  * +---+---+-------------------+--------+-------------+------+------+
6566  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
6567  * +---+---+-------------------+--------+-------------+------+------+
6568  *
6569  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6570  */
6571 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
6572                              int imm5)
6573 {
6574     int size = ctz32(imm5);
6575     int index = imm5 >> (size + 1);
6576
6577     if (size > 3 || (size == 3 && !is_q)) {
6578         unallocated_encoding(s);
6579         return;
6580     }
6581
6582     if (!fp_access_check(s)) {
6583         return;
6584     }
6585
6586     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
6587                          vec_reg_offset(s, rn, index, size),
6588                          is_q ? 16 : 8, vec_full_reg_size(s));
6589 }
6590
6591 /* DUP (element, scalar)
6592  *  31                   21 20    16 15        10  9    5 4    0
6593  * +-----------------------+--------+-------------+------+------+
6594  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
6595  * +-----------------------+--------+-------------+------+------+
6596  */
6597 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
6598                               int imm5)
6599 {
6600     int size = ctz32(imm5);
6601     int index;
6602     TCGv_i64 tmp;
6603
6604     if (size > 3) {
6605         unallocated_encoding(s);
6606         return;
6607     }
6608
6609     if (!fp_access_check(s)) {
6610         return;
6611     }
6612
6613     index = imm5 >> (size + 1);
6614
6615     /* This instruction just extracts the specified element and
6616      * zero-extends it into the bottom of the destination register.
6617      */
6618     tmp = tcg_temp_new_i64();
6619     read_vec_element(s, tmp, rn, index, size);
6620     write_fp_dreg(s, rd, tmp);
6621     tcg_temp_free_i64(tmp);
6622 }
6623
6624 /* DUP (General)
6625  *
6626  *  31  30   29              21 20    16 15        10  9    5 4    0
6627  * +---+---+-------------------+--------+-------------+------+------+
6628  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
6629  * +---+---+-------------------+--------+-------------+------+------+
6630  *
6631  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6632  */
6633 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
6634                              int imm5)
6635 {
6636     int size = ctz32(imm5);
6637     uint32_t dofs, oprsz, maxsz;
6638
6639     if (size > 3 || ((size == 3) && !is_q)) {
6640         unallocated_encoding(s);
6641         return;
6642     }
6643
6644     if (!fp_access_check(s)) {
6645         return;
6646     }
6647
6648     dofs = vec_full_reg_offset(s, rd);
6649     oprsz = is_q ? 16 : 8;
6650     maxsz = vec_full_reg_size(s);
6651
6652     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
6653 }
6654
6655 /* INS (Element)
6656  *
6657  *  31                   21 20    16 15  14    11  10 9    5 4    0
6658  * +-----------------------+--------+------------+---+------+------+
6659  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
6660  * +-----------------------+--------+------------+---+------+------+
6661  *
6662  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6663  * index: encoded in imm5<4:size+1>
6664  */
6665 static void handle_simd_inse(DisasContext *s, int rd, int rn,
6666                              int imm4, int imm5)
6667 {
6668     int size = ctz32(imm5);
6669     int src_index, dst_index;
6670     TCGv_i64 tmp;
6671
6672     if (size > 3) {
6673         unallocated_encoding(s);
6674         return;
6675     }
6676
6677     if (!fp_access_check(s)) {
6678         return;
6679     }
6680
6681     dst_index = extract32(imm5, 1+size, 5);
6682     src_index = extract32(imm4, size, 4);
6683
6684     tmp = tcg_temp_new_i64();
6685
6686     read_vec_element(s, tmp, rn, src_index, size);
6687     write_vec_element(s, tmp, rd, dst_index, size);
6688
6689     tcg_temp_free_i64(tmp);
6690 }
6691
6692
6693 /* INS (General)
6694  *
6695  *  31                   21 20    16 15        10  9    5 4    0
6696  * +-----------------------+--------+-------------+------+------+
6697  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
6698  * +-----------------------+--------+-------------+------+------+
6699  *
6700  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6701  * index: encoded in imm5<4:size+1>
6702  */
6703 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
6704 {
6705     int size = ctz32(imm5);
6706     int idx;
6707
6708     if (size > 3) {
6709         unallocated_encoding(s);
6710         return;
6711     }
6712
6713     if (!fp_access_check(s)) {
6714         return;
6715     }
6716
6717     idx = extract32(imm5, 1 + size, 4 - size);
6718     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
6719 }
6720
6721 /*
6722  * UMOV (General)
6723  * SMOV (General)
6724  *
6725  *  31  30   29              21 20    16 15    12   10 9    5 4    0
6726  * +---+---+-------------------+--------+-------------+------+------+
6727  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
6728  * +---+---+-------------------+--------+-------------+------+------+
6729  *
6730  * U: unsigned when set
6731  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6732  */
6733 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
6734                                   int rn, int rd, int imm5)
6735 {
6736     int size = ctz32(imm5);
6737     int element;
6738     TCGv_i64 tcg_rd;
6739
6740     /* Check for UnallocatedEncodings */
6741     if (is_signed) {
6742         if (size > 2 || (size == 2 && !is_q)) {
6743             unallocated_encoding(s);
6744             return;
6745         }
6746     } else {
6747         if (size > 3
6748             || (size < 3 && is_q)
6749             || (size == 3 && !is_q)) {
6750             unallocated_encoding(s);
6751             return;
6752         }
6753     }
6754
6755     if (!fp_access_check(s)) {
6756         return;
6757     }
6758
6759     element = extract32(imm5, 1+size, 4);
6760
6761     tcg_rd = cpu_reg(s, rd);
6762     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
6763     if (is_signed && !is_q) {
6764         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6765     }
6766 }
6767
6768 /* AdvSIMD copy
6769  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
6770  * +---+---+----+-----------------+------+---+------+---+------+------+
6771  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6772  * +---+---+----+-----------------+------+---+------+---+------+------+
6773  */
6774 static void disas_simd_copy(DisasContext *s, uint32_t insn)
6775 {
6776     int rd = extract32(insn, 0, 5);
6777     int rn = extract32(insn, 5, 5);
6778     int imm4 = extract32(insn, 11, 4);
6779     int op = extract32(insn, 29, 1);
6780     int is_q = extract32(insn, 30, 1);
6781     int imm5 = extract32(insn, 16, 5);
6782
6783     if (op) {
6784         if (is_q) {
6785             /* INS (element) */
6786             handle_simd_inse(s, rd, rn, imm4, imm5);
6787         } else {
6788             unallocated_encoding(s);
6789         }
6790     } else {
6791         switch (imm4) {
6792         case 0:
6793             /* DUP (element - vector) */
6794             handle_simd_dupe(s, is_q, rd, rn, imm5);
6795             break;
6796         case 1:
6797             /* DUP (general) */
6798             handle_simd_dupg(s, is_q, rd, rn, imm5);
6799             break;
6800         case 3:
6801             if (is_q) {
6802                 /* INS (general) */
6803                 handle_simd_insg(s, rd, rn, imm5);
6804             } else {
6805                 unallocated_encoding(s);
6806             }
6807             break;
6808         case 5:
6809         case 7:
6810             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
6811             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
6812             break;
6813         default:
6814             unallocated_encoding(s);
6815             break;
6816         }
6817     }
6818 }
6819
6820 /* AdvSIMD modified immediate
6821  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
6822  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6823  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
6824  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6825  *
6826  * There are a number of operations that can be carried out here:
6827  *   MOVI - move (shifted) imm into register
6828  *   MVNI - move inverted (shifted) imm into register
6829  *   ORR  - bitwise OR of (shifted) imm with register
6830  *   BIC  - bitwise clear of (shifted) imm with register
6831  * With ARMv8.2 we also have:
6832  *   FMOV half-precision
6833  */
6834 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
6835 {
6836     int rd = extract32(insn, 0, 5);
6837     int cmode = extract32(insn, 12, 4);
6838     int cmode_3_1 = extract32(cmode, 1, 3);
6839     int cmode_0 = extract32(cmode, 0, 1);
6840     int o2 = extract32(insn, 11, 1);
6841     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
6842     bool is_neg = extract32(insn, 29, 1);
6843     bool is_q = extract32(insn, 30, 1);
6844     uint64_t imm = 0;
6845
6846     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
6847         /* Check for FMOV (vector, immediate) - half-precision */
6848         if (!(arm_dc_feature(s, ARM_FEATURE_V8_FP16) && o2 && cmode == 0xf)) {
6849             unallocated_encoding(s);
6850             return;
6851         }
6852     }
6853
6854     if (!fp_access_check(s)) {
6855         return;
6856     }
6857
6858     /* See AdvSIMDExpandImm() in ARM ARM */
6859     switch (cmode_3_1) {
6860     case 0: /* Replicate(Zeros(24):imm8, 2) */
6861     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
6862     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
6863     case 3: /* Replicate(imm8:Zeros(24), 2) */
6864     {
6865         int shift = cmode_3_1 * 8;
6866         imm = bitfield_replicate(abcdefgh << shift, 32);
6867         break;
6868     }
6869     case 4: /* Replicate(Zeros(8):imm8, 4) */
6870     case 5: /* Replicate(imm8:Zeros(8), 4) */
6871     {
6872         int shift = (cmode_3_1 & 0x1) * 8;
6873         imm = bitfield_replicate(abcdefgh << shift, 16);
6874         break;
6875     }
6876     case 6:
6877         if (cmode_0) {
6878             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
6879             imm = (abcdefgh << 16) | 0xffff;
6880         } else {
6881             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
6882             imm = (abcdefgh << 8) | 0xff;
6883         }
6884         imm = bitfield_replicate(imm, 32);
6885         break;
6886     case 7:
6887         if (!cmode_0 && !is_neg) {
6888             imm = bitfield_replicate(abcdefgh, 8);
6889         } else if (!cmode_0 && is_neg) {
6890             int i;
6891             imm = 0;
6892             for (i = 0; i < 8; i++) {
6893                 if ((abcdefgh) & (1 << i)) {
6894                     imm |= 0xffULL << (i * 8);
6895                 }
6896             }
6897         } else if (cmode_0) {
6898             if (is_neg) {
6899                 imm = (abcdefgh & 0x3f) << 48;
6900                 if (abcdefgh & 0x80) {
6901                     imm |= 0x8000000000000000ULL;
6902                 }
6903                 if (abcdefgh & 0x40) {
6904                     imm |= 0x3fc0000000000000ULL;
6905                 } else {
6906                     imm |= 0x4000000000000000ULL;
6907                 }
6908             } else {
6909                 if (o2) {
6910                     /* FMOV (vector, immediate) - half-precision */
6911                     imm = vfp_expand_imm(MO_16, abcdefgh);
6912                     /* now duplicate across the lanes */
6913                     imm = bitfield_replicate(imm, 16);
6914                 } else {
6915                     imm = (abcdefgh & 0x3f) << 19;
6916                     if (abcdefgh & 0x80) {
6917                         imm |= 0x80000000;
6918                     }
6919                     if (abcdefgh & 0x40) {
6920                         imm |= 0x3e000000;
6921                     } else {
6922                         imm |= 0x40000000;
6923                     }
6924                     imm |= (imm << 32);
6925                 }
6926             }
6927         }
6928         break;
6929     default:
6930         fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
6931         g_assert_not_reached();
6932     }
6933
6934     if (cmode_3_1 != 7 && is_neg) {
6935         imm = ~imm;
6936     }
6937
6938     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
6939         /* MOVI or MVNI, with MVNI negation handled above.  */
6940         tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
6941                             vec_full_reg_size(s), imm);
6942     } else {
6943         /* ORR or BIC, with BIC negation to AND handled above.  */
6944         if (is_neg) {
6945             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
6946         } else {
6947             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
6948         }
6949     }
6950 }
6951
6952 /* AdvSIMD scalar copy
6953  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6954  * +-----+----+-----------------+------+---+------+---+------+------+
6955  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6956  * +-----+----+-----------------+------+---+------+---+------+------+
6957  */
6958 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6959 {
6960     int rd = extract32(insn, 0, 5);
6961     int rn = extract32(insn, 5, 5);
6962     int imm4 = extract32(insn, 11, 4);
6963     int imm5 = extract32(insn, 16, 5);
6964     int op = extract32(insn, 29, 1);
6965
6966     if (op != 0 || imm4 != 0) {
6967         unallocated_encoding(s);
6968         return;
6969     }
6970
6971     /* DUP (element, scalar) */
6972     handle_simd_dupes(s, rd, rn, imm5);
6973 }
6974
6975 /* AdvSIMD scalar pairwise
6976  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6977  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6978  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6979  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6980  */
6981 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6982 {
6983     int u = extract32(insn, 29, 1);
6984     int size = extract32(insn, 22, 2);
6985     int opcode = extract32(insn, 12, 5);
6986     int rn = extract32(insn, 5, 5);
6987     int rd = extract32(insn, 0, 5);
6988     TCGv_ptr fpst;
6989
6990     /* For some ops (the FP ones), size[1] is part of the encoding.
6991      * For ADDP strictly it is not but size[1] is always 1 for valid
6992      * encodings.
6993      */
6994     opcode |= (extract32(size, 1, 1) << 5);
6995
6996     switch (opcode) {
6997     case 0x3b: /* ADDP */
6998         if (u || size != 3) {
6999             unallocated_encoding(s);
7000             return;
7001         }
7002         if (!fp_access_check(s)) {
7003             return;
7004         }
7005
7006         fpst = NULL;
7007         break;
7008     case 0xc: /* FMAXNMP */
7009     case 0xd: /* FADDP */
7010     case 0xf: /* FMAXP */
7011     case 0x2c: /* FMINNMP */
7012     case 0x2f: /* FMINP */
7013         /* FP op, size[0] is 32 or 64 bit*/
7014         if (!u) {
7015             if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
7016                 unallocated_encoding(s);
7017                 return;
7018             } else {
7019                 size = MO_16;
7020             }
7021         } else {
7022             size = extract32(size, 0, 1) ? MO_64 : MO_32;
7023         }
7024
7025         if (!fp_access_check(s)) {
7026             return;
7027         }
7028
7029         fpst = get_fpstatus_ptr(size == MO_16);
7030         break;
7031     default:
7032         unallocated_encoding(s);
7033         return;
7034     }
7035
7036     if (size == MO_64) {
7037         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7038         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7039         TCGv_i64 tcg_res = tcg_temp_new_i64();
7040
7041         read_vec_element(s, tcg_op1, rn, 0, MO_64);
7042         read_vec_element(s, tcg_op2, rn, 1, MO_64);
7043
7044         switch (opcode) {
7045         case 0x3b: /* ADDP */
7046             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
7047             break;
7048         case 0xc: /* FMAXNMP */
7049             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7050             break;
7051         case 0xd: /* FADDP */
7052             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7053             break;
7054         case 0xf: /* FMAXP */
7055             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7056             break;
7057         case 0x2c: /* FMINNMP */
7058             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7059             break;
7060         case 0x2f: /* FMINP */
7061             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7062             break;
7063         default:
7064             g_assert_not_reached();
7065         }
7066
7067         write_fp_dreg(s, rd, tcg_res);
7068
7069         tcg_temp_free_i64(tcg_op1);
7070         tcg_temp_free_i64(tcg_op2);
7071         tcg_temp_free_i64(tcg_res);
7072     } else {
7073         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7074         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7075         TCGv_i32 tcg_res = tcg_temp_new_i32();
7076
7077         read_vec_element_i32(s, tcg_op1, rn, 0, size);
7078         read_vec_element_i32(s, tcg_op2, rn, 1, size);
7079
7080         if (size == MO_16) {
7081             switch (opcode) {
7082             case 0xc: /* FMAXNMP */
7083                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7084                 break;
7085             case 0xd: /* FADDP */
7086                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
7087                 break;
7088             case 0xf: /* FMAXP */
7089                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
7090                 break;
7091             case 0x2c: /* FMINNMP */
7092                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
7093                 break;
7094             case 0x2f: /* FMINP */
7095                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
7096                 break;
7097             default:
7098                 g_assert_not_reached();
7099             }
7100         } else {
7101             switch (opcode) {
7102             case 0xc: /* FMAXNMP */
7103                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7104                 break;
7105             case 0xd: /* FADDP */
7106                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7107                 break;
7108             case 0xf: /* FMAXP */
7109                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7110                 break;
7111             case 0x2c: /* FMINNMP */
7112                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7113                 break;
7114             case 0x2f: /* FMINP */
7115                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7116                 break;
7117             default:
7118                 g_assert_not_reached();
7119             }
7120         }
7121
7122         write_fp_sreg(s, rd, tcg_res);
7123
7124         tcg_temp_free_i32(tcg_op1);
7125         tcg_temp_free_i32(tcg_op2);
7126         tcg_temp_free_i32(tcg_res);
7127     }
7128
7129     if (fpst) {
7130         tcg_temp_free_ptr(fpst);
7131     }
7132 }
7133
7134 /*
7135  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
7136  *
7137  * This code is handles the common shifting code and is used by both
7138  * the vector and scalar code.
7139  */
7140 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
7141                                     TCGv_i64 tcg_rnd, bool accumulate,
7142                                     bool is_u, int size, int shift)
7143 {
7144     bool extended_result = false;
7145     bool round = tcg_rnd != NULL;
7146     int ext_lshift = 0;
7147     TCGv_i64 tcg_src_hi;
7148
7149     if (round && size == 3) {
7150         extended_result = true;
7151         ext_lshift = 64 - shift;
7152         tcg_src_hi = tcg_temp_new_i64();
7153     } else if (shift == 64) {
7154         if (!accumulate && is_u) {
7155             /* result is zero */
7156             tcg_gen_movi_i64(tcg_res, 0);
7157             return;
7158         }
7159     }
7160
7161     /* Deal with the rounding step */
7162     if (round) {
7163         if (extended_result) {
7164             TCGv_i64 tcg_zero = tcg_const_i64(0);
7165             if (!is_u) {
7166                 /* take care of sign extending tcg_res */
7167                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
7168                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7169                                  tcg_src, tcg_src_hi,
7170                                  tcg_rnd, tcg_zero);
7171             } else {
7172                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
7173                                  tcg_src, tcg_zero,
7174                                  tcg_rnd, tcg_zero);
7175             }
7176             tcg_temp_free_i64(tcg_zero);
7177         } else {
7178             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
7179         }
7180     }
7181
7182     /* Now do the shift right */
7183     if (round && extended_result) {
7184         /* extended case, >64 bit precision required */
7185         if (ext_lshift == 0) {
7186             /* special case, only high bits matter */
7187             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
7188         } else {
7189             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7190             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
7191             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
7192         }
7193     } else {
7194         if (is_u) {
7195             if (shift == 64) {
7196                 /* essentially shifting in 64 zeros */
7197                 tcg_gen_movi_i64(tcg_src, 0);
7198             } else {
7199                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
7200             }
7201         } else {
7202             if (shift == 64) {
7203                 /* effectively extending the sign-bit */
7204                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
7205             } else {
7206                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
7207             }
7208         }
7209     }
7210
7211     if (accumulate) {
7212         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
7213     } else {
7214         tcg_gen_mov_i64(tcg_res, tcg_src);
7215     }
7216
7217     if (extended_result) {
7218         tcg_temp_free_i64(tcg_src_hi);
7219     }
7220 }
7221
7222 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
7223 static void handle_scalar_simd_shri(DisasContext *s,
7224                                     bool is_u, int immh, int immb,
7225                                     int opcode, int rn, int rd)
7226 {
7227     const int size = 3;
7228     int immhb = immh << 3 | immb;
7229     int shift = 2 * (8 << size) - immhb;
7230     bool accumulate = false;
7231     bool round = false;
7232     bool insert = false;
7233     TCGv_i64 tcg_rn;
7234     TCGv_i64 tcg_rd;
7235     TCGv_i64 tcg_round;
7236
7237     if (!extract32(immh, 3, 1)) {
7238         unallocated_encoding(s);
7239         return;
7240     }
7241
7242     if (!fp_access_check(s)) {
7243         return;
7244     }
7245
7246     switch (opcode) {
7247     case 0x02: /* SSRA / USRA (accumulate) */
7248         accumulate = true;
7249         break;
7250     case 0x04: /* SRSHR / URSHR (rounding) */
7251         round = true;
7252         break;
7253     case 0x06: /* SRSRA / URSRA (accum + rounding) */
7254         accumulate = round = true;
7255         break;
7256     case 0x08: /* SRI */
7257         insert = true;
7258         break;
7259     }
7260
7261     if (round) {
7262         uint64_t round_const = 1ULL << (shift - 1);
7263         tcg_round = tcg_const_i64(round_const);
7264     } else {
7265         tcg_round = NULL;
7266     }
7267
7268     tcg_rn = read_fp_dreg(s, rn);
7269     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
7270
7271     if (insert) {
7272         /* shift count same as element size is valid but does nothing;
7273          * special case to avoid potential shift by 64.
7274          */
7275         int esize = 8 << size;
7276         if (shift != esize) {
7277             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
7278             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
7279         }
7280     } else {
7281         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
7282                                 accumulate, is_u, size, shift);
7283     }
7284
7285     write_fp_dreg(s, rd, tcg_rd);
7286
7287     tcg_temp_free_i64(tcg_rn);
7288     tcg_temp_free_i64(tcg_rd);
7289     if (round) {
7290         tcg_temp_free_i64(tcg_round);
7291     }
7292 }
7293
7294 /* SHL/SLI - Scalar shift left */
7295 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
7296                                     int immh, int immb, int opcode,
7297                                     int rn, int rd)
7298 {
7299     int size = 32 - clz32(immh) - 1;
7300     int immhb = immh << 3 | immb;
7301     int shift = immhb - (8 << size);
7302     TCGv_i64 tcg_rn = new_tmp_a64(s);
7303     TCGv_i64 tcg_rd = new_tmp_a64(s);
7304
7305     if (!extract32(immh, 3, 1)) {
7306         unallocated_encoding(s);
7307         return;
7308     }
7309
7310     if (!fp_access_check(s)) {
7311         return;
7312     }
7313
7314     tcg_rn = read_fp_dreg(s, rn);
7315     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
7316
7317     if (insert) {
7318         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
7319     } else {
7320         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
7321     }
7322
7323     write_fp_dreg(s, rd, tcg_rd);
7324
7325     tcg_temp_free_i64(tcg_rn);
7326     tcg_temp_free_i64(tcg_rd);
7327 }
7328
7329 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
7330  * (signed/unsigned) narrowing */
7331 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
7332                                    bool is_u_shift, bool is_u_narrow,
7333                                    int immh, int immb, int opcode,
7334                                    int rn, int rd)
7335 {
7336     int immhb = immh << 3 | immb;
7337     int size = 32 - clz32(immh) - 1;
7338     int esize = 8 << size;
7339     int shift = (2 * esize) - immhb;
7340     int elements = is_scalar ? 1 : (64 / esize);
7341     bool round = extract32(opcode, 0, 1);
7342     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
7343     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
7344     TCGv_i32 tcg_rd_narrowed;
7345     TCGv_i64 tcg_final;
7346
7347     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
7348         { gen_helper_neon_narrow_sat_s8,
7349           gen_helper_neon_unarrow_sat8 },
7350         { gen_helper_neon_narrow_sat_s16,
7351           gen_helper_neon_unarrow_sat16 },
7352         { gen_helper_neon_narrow_sat_s32,
7353           gen_helper_neon_unarrow_sat32 },
7354         { NULL, NULL },
7355     };
7356     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
7357         gen_helper_neon_narrow_sat_u8,
7358         gen_helper_neon_narrow_sat_u16,
7359         gen_helper_neon_narrow_sat_u32,
7360         NULL
7361     };
7362     NeonGenNarrowEnvFn *narrowfn;
7363
7364     int i;
7365
7366     assert(size < 4);
7367
7368     if (extract32(immh, 3, 1)) {
7369         unallocated_encoding(s);
7370         return;
7371     }
7372
7373     if (!fp_access_check(s)) {
7374         return;
7375     }
7376
7377     if (is_u_shift) {
7378         narrowfn = unsigned_narrow_fns[size];
7379     } else {
7380         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
7381     }
7382
7383     tcg_rn = tcg_temp_new_i64();
7384     tcg_rd = tcg_temp_new_i64();
7385     tcg_rd_narrowed = tcg_temp_new_i32();
7386     tcg_final = tcg_const_i64(0);
7387
7388     if (round) {
7389         uint64_t round_const = 1ULL << (shift - 1);
7390         tcg_round = tcg_const_i64(round_const);
7391     } else {
7392         tcg_round = NULL;
7393     }
7394
7395     for (i = 0; i < elements; i++) {
7396         read_vec_element(s, tcg_rn, rn, i, ldop);
7397         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
7398                                 false, is_u_shift, size+1, shift);
7399         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
7400         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
7401         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
7402     }
7403
7404     if (!is_q) {
7405         write_vec_element(s, tcg_final, rd, 0, MO_64);
7406     } else {
7407         write_vec_element(s, tcg_final, rd, 1, MO_64);
7408     }
7409
7410     if (round) {
7411         tcg_temp_free_i64(tcg_round);
7412     }
7413     tcg_temp_free_i64(tcg_rn);
7414     tcg_temp_free_i64(tcg_rd);
7415     tcg_temp_free_i32(tcg_rd_narrowed);
7416     tcg_temp_free_i64(tcg_final);
7417
7418     clear_vec_high(s, is_q, rd);
7419 }
7420
7421 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
7422 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
7423                              bool src_unsigned, bool dst_unsigned,
7424                              int immh, int immb, int rn, int rd)
7425 {
7426     int immhb = immh << 3 | immb;
7427     int size = 32 - clz32(immh) - 1;
7428     int shift = immhb - (8 << size);
7429     int pass;
7430
7431     assert(immh != 0);
7432     assert(!(scalar && is_q));
7433
7434     if (!scalar) {
7435         if (!is_q && extract32(immh, 3, 1)) {
7436             unallocated_encoding(s);
7437             return;
7438         }
7439
7440         /* Since we use the variable-shift helpers we must
7441          * replicate the shift count into each element of
7442          * the tcg_shift value.
7443          */
7444         switch (size) {
7445         case 0:
7446             shift |= shift << 8;
7447             /* fall through */
7448         case 1:
7449             shift |= shift << 16;
7450             break;
7451         case 2:
7452         case 3:
7453             break;
7454         default:
7455             g_assert_not_reached();
7456         }
7457     }
7458
7459     if (!fp_access_check(s)) {
7460         return;
7461     }
7462
7463     if (size == 3) {
7464         TCGv_i64 tcg_shift = tcg_const_i64(shift);
7465         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
7466             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
7467             { NULL, gen_helper_neon_qshl_u64 },
7468         };
7469         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
7470         int maxpass = is_q ? 2 : 1;
7471
7472         for (pass = 0; pass < maxpass; pass++) {
7473             TCGv_i64 tcg_op = tcg_temp_new_i64();
7474
7475             read_vec_element(s, tcg_op, rn, pass, MO_64);
7476             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
7477             write_vec_element(s, tcg_op, rd, pass, MO_64);
7478
7479             tcg_temp_free_i64(tcg_op);
7480         }
7481         tcg_temp_free_i64(tcg_shift);
7482         clear_vec_high(s, is_q, rd);
7483     } else {
7484         TCGv_i32 tcg_shift = tcg_const_i32(shift);
7485         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
7486             {
7487                 { gen_helper_neon_qshl_s8,
7488                   gen_helper_neon_qshl_s16,
7489                   gen_helper_neon_qshl_s32 },
7490                 { gen_helper_neon_qshlu_s8,
7491                   gen_helper_neon_qshlu_s16,
7492                   gen_helper_neon_qshlu_s32 }
7493             }, {
7494                 { NULL, NULL, NULL },
7495                 { gen_helper_neon_qshl_u8,
7496                   gen_helper_neon_qshl_u16,
7497                   gen_helper_neon_qshl_u32 }
7498             }
7499         };
7500         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
7501         TCGMemOp memop = scalar ? size : MO_32;
7502         int maxpass = scalar ? 1 : is_q ? 4 : 2;
7503
7504         for (pass = 0; pass < maxpass; pass++) {
7505             TCGv_i32 tcg_op = tcg_temp_new_i32();
7506
7507             read_vec_element_i32(s, tcg_op, rn, pass, memop);
7508             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
7509             if (scalar) {
7510                 switch (size) {
7511                 case 0:
7512                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
7513                     break;
7514                 case 1:
7515                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
7516                     break;
7517                 case 2:
7518                     break;
7519                 default:
7520                     g_assert_not_reached();
7521                 }
7522                 write_fp_sreg(s, rd, tcg_op);
7523             } else {
7524                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
7525             }
7526
7527             tcg_temp_free_i32(tcg_op);
7528         }
7529         tcg_temp_free_i32(tcg_shift);
7530
7531         if (!scalar) {
7532             clear_vec_high(s, is_q, rd);
7533         }
7534     }
7535 }
7536
7537 /* Common vector code for handling integer to FP conversion */
7538 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
7539                                    int elements, int is_signed,
7540                                    int fracbits, int size)
7541 {
7542     TCGv_ptr tcg_fpst = get_fpstatus_ptr(size == MO_16);
7543     TCGv_i32 tcg_shift = NULL;
7544
7545     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
7546     int pass;
7547
7548     if (fracbits || size == MO_64) {
7549         tcg_shift = tcg_const_i32(fracbits);
7550     }
7551
7552     if (size == MO_64) {
7553         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
7554         TCGv_i64 tcg_double = tcg_temp_new_i64();
7555
7556         for (pass = 0; pass < elements; pass++) {
7557             read_vec_element(s, tcg_int64, rn, pass, mop);
7558
7559             if (is_signed) {
7560                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
7561                                      tcg_shift, tcg_fpst);
7562             } else {
7563                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
7564                                      tcg_shift, tcg_fpst);
7565             }
7566             if (elements == 1) {
7567                 write_fp_dreg(s, rd, tcg_double);
7568             } else {
7569                 write_vec_element(s, tcg_double, rd, pass, MO_64);
7570             }
7571         }
7572
7573         tcg_temp_free_i64(tcg_int64);
7574         tcg_temp_free_i64(tcg_double);
7575
7576     } else {
7577         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
7578         TCGv_i32 tcg_float = tcg_temp_new_i32();
7579
7580         for (pass = 0; pass < elements; pass++) {
7581             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
7582
7583             switch (size) {
7584             case MO_32:
7585                 if (fracbits) {
7586                     if (is_signed) {
7587                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
7588                                              tcg_shift, tcg_fpst);
7589                     } else {
7590                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
7591                                              tcg_shift, tcg_fpst);
7592                     }
7593                 } else {
7594                     if (is_signed) {
7595                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
7596                     } else {
7597                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
7598                     }
7599                 }
7600                 break;
7601             case MO_16:
7602                 if (fracbits) {
7603                     if (is_signed) {
7604                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
7605                                              tcg_shift, tcg_fpst);
7606                     } else {
7607                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
7608                                              tcg_shift, tcg_fpst);
7609                     }
7610                 } else {
7611                     if (is_signed) {
7612                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
7613                     } else {
7614                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
7615                     }
7616                 }
7617                 break;
7618             default:
7619                 g_assert_not_reached();
7620             }
7621
7622             if (elements == 1) {
7623                 write_fp_sreg(s, rd, tcg_float);
7624             } else {
7625                 write_vec_element_i32(s, tcg_float, rd, pass, size);
7626             }
7627         }
7628
7629         tcg_temp_free_i32(tcg_int32);
7630         tcg_temp_free_i32(tcg_float);
7631     }
7632
7633     tcg_temp_free_ptr(tcg_fpst);
7634     if (tcg_shift) {
7635         tcg_temp_free_i32(tcg_shift);
7636     }
7637
7638     clear_vec_high(s, elements << size == 16, rd);
7639 }
7640
7641 /* UCVTF/SCVTF - Integer to FP conversion */
7642 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
7643                                          bool is_q, bool is_u,
7644                                          int immh, int immb, int opcode,
7645                                          int rn, int rd)
7646 {
7647     int size, elements, fracbits;
7648     int immhb = immh << 3 | immb;
7649
7650     if (immh & 8) {
7651         size = MO_64;
7652         if (!is_scalar && !is_q) {
7653             unallocated_encoding(s);
7654             return;
7655         }
7656     } else if (immh & 4) {
7657         size = MO_32;
7658     } else if (immh & 2) {
7659         size = MO_16;
7660         if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
7661             unallocated_encoding(s);
7662             return;
7663         }
7664     } else {
7665         /* immh == 0 would be a failure of the decode logic */
7666         g_assert(immh == 1);
7667         unallocated_encoding(s);
7668         return;
7669     }
7670
7671     if (is_scalar) {
7672         elements = 1;
7673     } else {
7674         elements = (8 << is_q) >> size;
7675     }
7676     fracbits = (16 << size) - immhb;
7677
7678     if (!fp_access_check(s)) {
7679         return;
7680     }
7681
7682     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
7683 }
7684
7685 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
7686 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
7687                                          bool is_q, bool is_u,
7688                                          int immh, int immb, int rn, int rd)
7689 {
7690     int immhb = immh << 3 | immb;
7691     int pass, size, fracbits;
7692     TCGv_ptr tcg_fpstatus;
7693     TCGv_i32 tcg_rmode, tcg_shift;
7694
7695     if (immh & 0x8) {
7696         size = MO_64;
7697         if (!is_scalar && !is_q) {
7698             unallocated_encoding(s);
7699             return;
7700         }
7701     } else if (immh & 0x4) {
7702         size = MO_32;
7703     } else if (immh & 0x2) {
7704         size = MO_16;
7705         if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
7706             unallocated_encoding(s);
7707             return;
7708         }
7709     } else {
7710         /* Should have split out AdvSIMD modified immediate earlier.  */
7711         assert(immh == 1);
7712         unallocated_encoding(s);
7713         return;
7714     }
7715
7716     if (!fp_access_check(s)) {
7717         return;
7718     }
7719
7720     assert(!(is_scalar && is_q));
7721
7722     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
7723     tcg_fpstatus = get_fpstatus_ptr(size == MO_16);
7724     gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
7725     fracbits = (16 << size) - immhb;
7726     tcg_shift = tcg_const_i32(fracbits);
7727
7728     if (size == MO_64) {
7729         int maxpass = is_scalar ? 1 : 2;
7730
7731         for (pass = 0; pass < maxpass; pass++) {
7732             TCGv_i64 tcg_op = tcg_temp_new_i64();
7733
7734             read_vec_element(s, tcg_op, rn, pass, MO_64);
7735             if (is_u) {
7736                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7737             } else {
7738                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7739             }
7740             write_vec_element(s, tcg_op, rd, pass, MO_64);
7741             tcg_temp_free_i64(tcg_op);
7742         }
7743         clear_vec_high(s, is_q, rd);
7744     } else {
7745         void (*fn)(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
7746         int maxpass = is_scalar ? 1 : ((8 << is_q) >> size);
7747
7748         switch (size) {
7749         case MO_16:
7750             if (is_u) {
7751                 fn = gen_helper_vfp_touhh;
7752             } else {
7753                 fn = gen_helper_vfp_toshh;
7754             }
7755             break;
7756         case MO_32:
7757             if (is_u) {
7758                 fn = gen_helper_vfp_touls;
7759             } else {
7760                 fn = gen_helper_vfp_tosls;
7761             }
7762             break;
7763         default:
7764             g_assert_not_reached();
7765         }
7766
7767         for (pass = 0; pass < maxpass; pass++) {
7768             TCGv_i32 tcg_op = tcg_temp_new_i32();
7769
7770             read_vec_element_i32(s, tcg_op, rn, pass, size);
7771             fn(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7772             if (is_scalar) {
7773                 write_fp_sreg(s, rd, tcg_op);
7774             } else {
7775                 write_vec_element_i32(s, tcg_op, rd, pass, size);
7776             }
7777             tcg_temp_free_i32(tcg_op);
7778         }
7779         if (!is_scalar) {
7780             clear_vec_high(s, is_q, rd);
7781         }
7782     }
7783
7784     tcg_temp_free_ptr(tcg_fpstatus);
7785     tcg_temp_free_i32(tcg_shift);
7786     gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
7787     tcg_temp_free_i32(tcg_rmode);
7788 }
7789
7790 /* AdvSIMD scalar shift by immediate
7791  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
7792  * +-----+---+-------------+------+------+--------+---+------+------+
7793  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
7794  * +-----+---+-------------+------+------+--------+---+------+------+
7795  *
7796  * This is the scalar version so it works on a fixed sized registers
7797  */
7798 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
7799 {
7800     int rd = extract32(insn, 0, 5);
7801     int rn = extract32(insn, 5, 5);
7802     int opcode = extract32(insn, 11, 5);
7803     int immb = extract32(insn, 16, 3);
7804     int immh = extract32(insn, 19, 4);
7805     bool is_u = extract32(insn, 29, 1);
7806
7807     if (immh == 0) {
7808         unallocated_encoding(s);
7809         return;
7810     }
7811
7812     switch (opcode) {
7813     case 0x08: /* SRI */
7814         if (!is_u) {
7815             unallocated_encoding(s);
7816             return;
7817         }
7818         /* fall through */
7819     case 0x00: /* SSHR / USHR */
7820     case 0x02: /* SSRA / USRA */
7821     case 0x04: /* SRSHR / URSHR */
7822     case 0x06: /* SRSRA / URSRA */
7823         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
7824         break;
7825     case 0x0a: /* SHL / SLI */
7826         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
7827         break;
7828     case 0x1c: /* SCVTF, UCVTF */
7829         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
7830                                      opcode, rn, rd);
7831         break;
7832     case 0x10: /* SQSHRUN, SQSHRUN2 */
7833     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
7834         if (!is_u) {
7835             unallocated_encoding(s);
7836             return;
7837         }
7838         handle_vec_simd_sqshrn(s, true, false, false, true,
7839                                immh, immb, opcode, rn, rd);
7840         break;
7841     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
7842     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
7843         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
7844                                immh, immb, opcode, rn, rd);
7845         break;
7846     case 0xc: /* SQSHLU */
7847         if (!is_u) {
7848             unallocated_encoding(s);
7849             return;
7850         }
7851         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
7852         break;
7853     case 0xe: /* SQSHL, UQSHL */
7854         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
7855         break;
7856     case 0x1f: /* FCVTZS, FCVTZU */
7857         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
7858         break;
7859     default:
7860         unallocated_encoding(s);
7861         break;
7862     }
7863 }
7864
7865 /* AdvSIMD scalar three different
7866  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
7867  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7868  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
7869  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7870  */
7871 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
7872 {
7873     bool is_u = extract32(insn, 29, 1);
7874     int size = extract32(insn, 22, 2);
7875     int opcode = extract32(insn, 12, 4);
7876     int rm = extract32(insn, 16, 5);
7877     int rn = extract32(insn, 5, 5);
7878     int rd = extract32(insn, 0, 5);
7879
7880     if (is_u) {
7881         unallocated_encoding(s);
7882         return;
7883     }
7884
7885     switch (opcode) {
7886     case 0x9: /* SQDMLAL, SQDMLAL2 */
7887     case 0xb: /* SQDMLSL, SQDMLSL2 */
7888     case 0xd: /* SQDMULL, SQDMULL2 */
7889         if (size == 0 || size == 3) {
7890             unallocated_encoding(s);
7891             return;
7892         }
7893         break;
7894     default:
7895         unallocated_encoding(s);
7896         return;
7897     }
7898
7899     if (!fp_access_check(s)) {
7900         return;
7901     }
7902
7903     if (size == 2) {
7904         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7905         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7906         TCGv_i64 tcg_res = tcg_temp_new_i64();
7907
7908         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
7909         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
7910
7911         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
7912         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
7913
7914         switch (opcode) {
7915         case 0xd: /* SQDMULL, SQDMULL2 */
7916             break;
7917         case 0xb: /* SQDMLSL, SQDMLSL2 */
7918             tcg_gen_neg_i64(tcg_res, tcg_res);
7919             /* fall through */
7920         case 0x9: /* SQDMLAL, SQDMLAL2 */
7921             read_vec_element(s, tcg_op1, rd, 0, MO_64);
7922             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
7923                                               tcg_res, tcg_op1);
7924             break;
7925         default:
7926             g_assert_not_reached();
7927         }
7928
7929         write_fp_dreg(s, rd, tcg_res);
7930
7931         tcg_temp_free_i64(tcg_op1);
7932         tcg_temp_free_i64(tcg_op2);
7933         tcg_temp_free_i64(tcg_res);
7934     } else {
7935         TCGv_i32 tcg_op1 = read_fp_hreg(s, rn);
7936         TCGv_i32 tcg_op2 = read_fp_hreg(s, rm);
7937         TCGv_i64 tcg_res = tcg_temp_new_i64();
7938
7939         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
7940         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
7941
7942         switch (opcode) {
7943         case 0xd: /* SQDMULL, SQDMULL2 */
7944             break;
7945         case 0xb: /* SQDMLSL, SQDMLSL2 */
7946             gen_helper_neon_negl_u32(tcg_res, tcg_res);
7947             /* fall through */
7948         case 0x9: /* SQDMLAL, SQDMLAL2 */
7949         {
7950             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
7951             read_vec_element(s, tcg_op3, rd, 0, MO_32);
7952             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
7953                                               tcg_res, tcg_op3);
7954             tcg_temp_free_i64(tcg_op3);
7955             break;
7956         }
7957         default:
7958             g_assert_not_reached();
7959         }
7960
7961         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7962         write_fp_dreg(s, rd, tcg_res);
7963
7964         tcg_temp_free_i32(tcg_op1);
7965         tcg_temp_free_i32(tcg_op2);
7966         tcg_temp_free_i64(tcg_res);
7967     }
7968 }
7969
7970 /* CMTST : test is "if (X & Y != 0)". */
7971 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
7972 {
7973     tcg_gen_and_i32(d, a, b);
7974     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
7975     tcg_gen_neg_i32(d, d);
7976 }
7977
7978 static void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
7979 {
7980     tcg_gen_and_i64(d, a, b);
7981     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
7982     tcg_gen_neg_i64(d, d);
7983 }
7984
7985 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
7986 {
7987     tcg_gen_and_vec(vece, d, a, b);
7988     tcg_gen_dupi_vec(vece, a, 0);
7989     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
7990 }
7991
7992 static void handle_3same_64(DisasContext *s, int opcode, bool u,
7993                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
7994 {
7995     /* Handle 64x64->64 opcodes which are shared between the scalar
7996      * and vector 3-same groups. We cover every opcode where size == 3
7997      * is valid in either the three-reg-same (integer, not pairwise)
7998      * or scalar-three-reg-same groups.
7999      */
8000     TCGCond cond;
8001
8002     switch (opcode) {
8003     case 0x1: /* SQADD */
8004         if (u) {
8005             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8006         } else {
8007             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8008         }
8009         break;
8010     case 0x5: /* SQSUB */
8011         if (u) {
8012             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8013         } else {
8014             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8015         }
8016         break;
8017     case 0x6: /* CMGT, CMHI */
8018         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
8019          * We implement this using setcond (test) and then negating.
8020          */
8021         cond = u ? TCG_COND_GTU : TCG_COND_GT;
8022     do_cmop:
8023         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
8024         tcg_gen_neg_i64(tcg_rd, tcg_rd);
8025         break;
8026     case 0x7: /* CMGE, CMHS */
8027         cond = u ? TCG_COND_GEU : TCG_COND_GE;
8028         goto do_cmop;
8029     case 0x11: /* CMTST, CMEQ */
8030         if (u) {
8031             cond = TCG_COND_EQ;
8032             goto do_cmop;
8033         }
8034         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
8035         break;
8036     case 0x8: /* SSHL, USHL */
8037         if (u) {
8038             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
8039         } else {
8040             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
8041         }
8042         break;
8043     case 0x9: /* SQSHL, UQSHL */
8044         if (u) {
8045             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8046         } else {
8047             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8048         }
8049         break;
8050     case 0xa: /* SRSHL, URSHL */
8051         if (u) {
8052             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
8053         } else {
8054             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
8055         }
8056         break;
8057     case 0xb: /* SQRSHL, UQRSHL */
8058         if (u) {
8059             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8060         } else {
8061             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
8062         }
8063         break;
8064     case 0x10: /* ADD, SUB */
8065         if (u) {
8066             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
8067         } else {
8068             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
8069         }
8070         break;
8071     default:
8072         g_assert_not_reached();
8073     }
8074 }
8075
8076 /* Handle the 3-same-operands float operations; shared by the scalar
8077  * and vector encodings. The caller must filter out any encodings
8078  * not allocated for the encoding it is dealing with.
8079  */
8080 static void handle_3same_float(DisasContext *s, int size, int elements,
8081                                int fpopcode, int rd, int rn, int rm)
8082 {
8083     int pass;
8084     TCGv_ptr fpst = get_fpstatus_ptr(false);
8085
8086     for (pass = 0; pass < elements; pass++) {
8087         if (size) {
8088             /* Double */
8089             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
8090             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
8091             TCGv_i64 tcg_res = tcg_temp_new_i64();
8092
8093             read_vec_element(s, tcg_op1, rn, pass, MO_64);
8094             read_vec_element(s, tcg_op2, rm, pass, MO_64);
8095
8096             switch (fpopcode) {
8097             case 0x39: /* FMLS */
8098                 /* As usual for ARM, separate negation for fused multiply-add */
8099                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
8100                 /* fall through */
8101             case 0x19: /* FMLA */
8102                 read_vec_element(s, tcg_res, rd, pass, MO_64);
8103                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
8104                                        tcg_res, fpst);
8105                 break;
8106             case 0x18: /* FMAXNM */
8107                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8108                 break;
8109             case 0x1a: /* FADD */
8110                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
8111                 break;
8112             case 0x1b: /* FMULX */
8113                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
8114                 break;
8115             case 0x1c: /* FCMEQ */
8116                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8117                 break;
8118             case 0x1e: /* FMAX */
8119                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
8120                 break;
8121             case 0x1f: /* FRECPS */
8122                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8123                 break;
8124             case 0x38: /* FMINNM */
8125                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
8126                 break;
8127             case 0x3a: /* FSUB */
8128                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8129                 break;
8130             case 0x3e: /* FMIN */
8131                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
8132                 break;
8133             case 0x3f: /* FRSQRTS */
8134                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8135                 break;
8136             case 0x5b: /* FMUL */
8137                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
8138                 break;
8139             case 0x5c: /* FCMGE */
8140                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8141                 break;
8142             case 0x5d: /* FACGE */
8143                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8144                 break;
8145             case 0x5f: /* FDIV */
8146                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
8147                 break;
8148             case 0x7a: /* FABD */
8149                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
8150                 gen_helper_vfp_absd(tcg_res, tcg_res);
8151                 break;
8152             case 0x7c: /* FCMGT */
8153                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8154                 break;
8155             case 0x7d: /* FACGT */
8156                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
8157                 break;
8158             default:
8159                 g_assert_not_reached();
8160             }
8161
8162             write_vec_element(s, tcg_res, rd, pass, MO_64);
8163
8164             tcg_temp_free_i64(tcg_res);
8165             tcg_temp_free_i64(tcg_op1);
8166             tcg_temp_free_i64(tcg_op2);
8167         } else {
8168             /* Single */
8169             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
8170             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
8171             TCGv_i32 tcg_res = tcg_temp_new_i32();
8172
8173             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
8174             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
8175
8176             switch (fpopcode) {
8177             case 0x39: /* FMLS */
8178                 /* As usual for ARM, separate negation for fused multiply-add */
8179                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
8180                 /* fall through */
8181             case 0x19: /* FMLA */
8182                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8183                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
8184                                        tcg_res, fpst);
8185                 break;
8186             case 0x1a: /* FADD */
8187                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
8188                 break;
8189             case 0x1b: /* FMULX */
8190                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
8191                 break;
8192             case 0x1c: /* FCMEQ */
8193                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8194                 break;
8195             case 0x1e: /* FMAX */
8196                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
8197                 break;
8198             case 0x1f: /* FRECPS */
8199                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8200                 break;
8201             case 0x18: /* FMAXNM */
8202                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
8203                 break;
8204             case 0x38: /* FMINNM */
8205                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
8206                 break;
8207             case 0x3a: /* FSUB */
8208                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8209                 break;
8210             case 0x3e: /* FMIN */
8211                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
8212                 break;
8213             case 0x3f: /* FRSQRTS */
8214                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8215                 break;
8216             case 0x5b: /* FMUL */
8217                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
8218                 break;
8219             case 0x5c: /* FCMGE */
8220                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8221                 break;
8222             case 0x5d: /* FACGE */
8223                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8224                 break;
8225             case 0x5f: /* FDIV */
8226                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
8227                 break;
8228             case 0x7a: /* FABD */
8229                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
8230                 gen_helper_vfp_abss(tcg_res, tcg_res);
8231                 break;
8232             case 0x7c: /* FCMGT */
8233                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8234                 break;
8235             case 0x7d: /* FACGT */
8236                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
8237                 break;
8238             default:
8239                 g_assert_not_reached();
8240             }
8241
8242             if (elements == 1) {
8243                 /* scalar single so clear high part */
8244                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
8245
8246                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
8247                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
8248                 tcg_temp_free_i64(tcg_tmp);
8249             } else {
8250                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8251             }
8252
8253             tcg_temp_free_i32(tcg_res);
8254             tcg_temp_free_i32(tcg_op1);
8255             tcg_temp_free_i32(tcg_op2);
8256         }
8257     }
8258
8259     tcg_temp_free_ptr(fpst);
8260
8261     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
8262 }
8263
8264 /* AdvSIMD scalar three same
8265  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
8266  * +-----+---+-----------+------+---+------+--------+---+------+------+
8267  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
8268  * +-----+---+-----------+------+---+------+--------+---+------+------+
8269  */
8270 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
8271 {
8272     int rd = extract32(insn, 0, 5);
8273     int rn = extract32(insn, 5, 5);
8274     int opcode = extract32(insn, 11, 5);
8275     int rm = extract32(insn, 16, 5);
8276     int size = extract32(insn, 22, 2);
8277     bool u = extract32(insn, 29, 1);
8278     TCGv_i64 tcg_rd;
8279
8280     if (opcode >= 0x18) {
8281         /* Floating point: U, size[1] and opcode indicate operation */
8282         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
8283         switch (fpopcode) {
8284         case 0x1b: /* FMULX */
8285         case 0x1f: /* FRECPS */
8286         case 0x3f: /* FRSQRTS */
8287         case 0x5d: /* FACGE */
8288         case 0x7d: /* FACGT */
8289         case 0x1c: /* FCMEQ */
8290         case 0x5c: /* FCMGE */
8291         case 0x7c: /* FCMGT */
8292         case 0x7a: /* FABD */
8293             break;
8294         default:
8295             unallocated_encoding(s);
8296             return;
8297         }
8298
8299         if (!fp_access_check(s)) {
8300             return;
8301         }
8302
8303         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
8304         return;
8305     }
8306
8307     switch (opcode) {
8308     case 0x1: /* SQADD, UQADD */
8309     case 0x5: /* SQSUB, UQSUB */
8310     case 0x9: /* SQSHL, UQSHL */
8311     case 0xb: /* SQRSHL, UQRSHL */
8312         break;
8313     case 0x8: /* SSHL, USHL */
8314     case 0xa: /* SRSHL, URSHL */
8315     case 0x6: /* CMGT, CMHI */
8316     case 0x7: /* CMGE, CMHS */
8317     case 0x11: /* CMTST, CMEQ */
8318     case 0x10: /* ADD, SUB (vector) */
8319         if (size != 3) {
8320             unallocated_encoding(s);
8321             return;
8322         }
8323         break;
8324     case 0x16: /* SQDMULH, SQRDMULH (vector) */
8325         if (size != 1 && size != 2) {
8326             unallocated_encoding(s);
8327             return;
8328         }
8329         break;
8330     default:
8331         unallocated_encoding(s);
8332         return;
8333     }
8334
8335     if (!fp_access_check(s)) {
8336         return;
8337     }
8338
8339     tcg_rd = tcg_temp_new_i64();
8340
8341     if (size == 3) {
8342         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8343         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
8344
8345         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
8346         tcg_temp_free_i64(tcg_rn);
8347         tcg_temp_free_i64(tcg_rm);
8348     } else {
8349         /* Do a single operation on the lowest element in the vector.
8350          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
8351          * no side effects for all these operations.
8352          * OPTME: special-purpose helpers would avoid doing some
8353          * unnecessary work in the helper for the 8 and 16 bit cases.
8354          */
8355         NeonGenTwoOpEnvFn *genenvfn;
8356         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8357         TCGv_i32 tcg_rm = tcg_temp_new_i32();
8358         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
8359
8360         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8361         read_vec_element_i32(s, tcg_rm, rm, 0, size);
8362
8363         switch (opcode) {
8364         case 0x1: /* SQADD, UQADD */
8365         {
8366             static NeonGenTwoOpEnvFn * const fns[3][2] = {
8367                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
8368                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
8369                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
8370             };
8371             genenvfn = fns[size][u];
8372             break;
8373         }
8374         case 0x5: /* SQSUB, UQSUB */
8375         {
8376             static NeonGenTwoOpEnvFn * const fns[3][2] = {
8377                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
8378                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
8379                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
8380             };
8381             genenvfn = fns[size][u];
8382             break;
8383         }
8384         case 0x9: /* SQSHL, UQSHL */
8385         {
8386             static NeonGenTwoOpEnvFn * const fns[3][2] = {
8387                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
8388                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
8389                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
8390             };
8391             genenvfn = fns[size][u];
8392             break;
8393         }
8394         case 0xb: /* SQRSHL, UQRSHL */
8395         {
8396             static NeonGenTwoOpEnvFn * const fns[3][2] = {
8397                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
8398                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
8399                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
8400             };
8401             genenvfn = fns[size][u];
8402             break;
8403         }
8404         case 0x16: /* SQDMULH, SQRDMULH */
8405         {
8406             static NeonGenTwoOpEnvFn * const fns[2][2] = {
8407                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
8408                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
8409             };
8410             assert(size == 1 || size == 2);
8411             genenvfn = fns[size - 1][u];
8412             break;
8413         }
8414         default:
8415             g_assert_not_reached();
8416         }
8417
8418         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
8419         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
8420         tcg_temp_free_i32(tcg_rd32);
8421         tcg_temp_free_i32(tcg_rn);
8422         tcg_temp_free_i32(tcg_rm);
8423     }
8424
8425     write_fp_dreg(s, rd, tcg_rd);
8426
8427     tcg_temp_free_i64(tcg_rd);
8428 }
8429
8430 /* AdvSIMD scalar three same FP16
8431  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
8432  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
8433  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
8434  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
8435  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
8436  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
8437  */
8438 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
8439                                                   uint32_t insn)
8440 {
8441     int rd = extract32(insn, 0, 5);
8442     int rn = extract32(insn, 5, 5);
8443     int opcode = extract32(insn, 11, 3);
8444     int rm = extract32(insn, 16, 5);
8445     bool u = extract32(insn, 29, 1);
8446     bool a = extract32(insn, 23, 1);
8447     int fpopcode = opcode | (a << 3) |  (u << 4);
8448     TCGv_ptr fpst;
8449     TCGv_i32 tcg_op1;
8450     TCGv_i32 tcg_op2;
8451     TCGv_i32 tcg_res;
8452
8453     switch (fpopcode) {
8454     case 0x03: /* FMULX */
8455     case 0x04: /* FCMEQ (reg) */
8456     case 0x07: /* FRECPS */
8457     case 0x0f: /* FRSQRTS */
8458     case 0x14: /* FCMGE (reg) */
8459     case 0x15: /* FACGE */
8460     case 0x1a: /* FABD */
8461     case 0x1c: /* FCMGT (reg) */
8462     case 0x1d: /* FACGT */
8463         break;
8464     default:
8465         unallocated_encoding(s);
8466         return;
8467     }
8468
8469     if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
8470         unallocated_encoding(s);
8471     }
8472
8473     if (!fp_access_check(s)) {
8474         return;
8475     }
8476
8477     fpst = get_fpstatus_ptr(true);
8478
8479     tcg_op1 = read_fp_hreg(s, rn);
8480     tcg_op2 = read_fp_hreg(s, rm);
8481     tcg_res = tcg_temp_new_i32();
8482
8483     switch (fpopcode) {
8484     case 0x03: /* FMULX */
8485         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
8486         break;
8487     case 0x04: /* FCMEQ (reg) */
8488         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8489         break;
8490     case 0x07: /* FRECPS */
8491         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8492         break;
8493     case 0x0f: /* FRSQRTS */
8494         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8495         break;
8496     case 0x14: /* FCMGE (reg) */
8497         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8498         break;
8499     case 0x15: /* FACGE */
8500         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8501         break;
8502     case 0x1a: /* FABD */
8503         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
8504         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
8505         break;
8506     case 0x1c: /* FCMGT (reg) */
8507         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8508         break;
8509     case 0x1d: /* FACGT */
8510         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
8511         break;
8512     default:
8513         g_assert_not_reached();
8514     }
8515
8516     write_fp_sreg(s, rd, tcg_res);
8517
8518
8519     tcg_temp_free_i32(tcg_res);
8520     tcg_temp_free_i32(tcg_op1);
8521     tcg_temp_free_i32(tcg_op2);
8522     tcg_temp_free_ptr(fpst);
8523 }
8524
8525 /* AdvSIMD scalar three same extra
8526  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
8527  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
8528  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
8529  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
8530  */
8531 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
8532                                                    uint32_t insn)
8533 {
8534     int rd = extract32(insn, 0, 5);
8535     int rn = extract32(insn, 5, 5);
8536     int opcode = extract32(insn, 11, 4);
8537     int rm = extract32(insn, 16, 5);
8538     int size = extract32(insn, 22, 2);
8539     bool u = extract32(insn, 29, 1);
8540     TCGv_i32 ele1, ele2, ele3;
8541     TCGv_i64 res;
8542     int feature;
8543
8544     switch (u * 16 + opcode) {
8545     case 0x10: /* SQRDMLAH (vector) */
8546     case 0x11: /* SQRDMLSH (vector) */
8547         if (size != 1 && size != 2) {
8548             unallocated_encoding(s);
8549             return;
8550         }
8551         feature = ARM_FEATURE_V8_RDM;
8552         break;
8553     default:
8554         unallocated_encoding(s);
8555         return;
8556     }
8557     if (!arm_dc_feature(s, feature)) {
8558         unallocated_encoding(s);
8559         return;
8560     }
8561     if (!fp_access_check(s)) {
8562         return;
8563     }
8564
8565     /* Do a single operation on the lowest element in the vector.
8566      * We use the standard Neon helpers and rely on 0 OP 0 == 0
8567      * with no side effects for all these operations.
8568      * OPTME: special-purpose helpers would avoid doing some
8569      * unnecessary work in the helper for the 16 bit cases.
8570      */
8571     ele1 = tcg_temp_new_i32();
8572     ele2 = tcg_temp_new_i32();
8573     ele3 = tcg_temp_new_i32();
8574
8575     read_vec_element_i32(s, ele1, rn, 0, size);
8576     read_vec_element_i32(s, ele2, rm, 0, size);
8577     read_vec_element_i32(s, ele3, rd, 0, size);
8578
8579     switch (opcode) {
8580     case 0x0: /* SQRDMLAH */
8581         if (size == 1) {
8582             gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
8583         } else {
8584             gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
8585         }
8586         break;
8587     case 0x1: /* SQRDMLSH */
8588         if (size == 1) {
8589             gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
8590         } else {
8591             gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
8592         }
8593         break;
8594     default:
8595         g_assert_not_reached();
8596     }
8597     tcg_temp_free_i32(ele1);
8598     tcg_temp_free_i32(ele2);
8599
8600     res = tcg_temp_new_i64();
8601     tcg_gen_extu_i32_i64(res, ele3);
8602     tcg_temp_free_i32(ele3);
8603
8604     write_fp_dreg(s, rd, res);
8605     tcg_temp_free_i64(res);
8606 }
8607
8608 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
8609                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
8610                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
8611 {
8612     /* Handle 64->64 opcodes which are shared between the scalar and
8613      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
8614      * is valid in either group and also the double-precision fp ops.
8615      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
8616      * requires them.
8617      */
8618     TCGCond cond;
8619
8620     switch (opcode) {
8621     case 0x4: /* CLS, CLZ */
8622         if (u) {
8623             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
8624         } else {
8625             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
8626         }
8627         break;
8628     case 0x5: /* NOT */
8629         /* This opcode is shared with CNT and RBIT but we have earlier
8630          * enforced that size == 3 if and only if this is the NOT insn.
8631          */
8632         tcg_gen_not_i64(tcg_rd, tcg_rn);
8633         break;
8634     case 0x7: /* SQABS, SQNEG */
8635         if (u) {
8636             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
8637         } else {
8638             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
8639         }
8640         break;
8641     case 0xa: /* CMLT */
8642         /* 64 bit integer comparison against zero, result is
8643          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
8644          * subtracting 1.
8645          */
8646         cond = TCG_COND_LT;
8647     do_cmop:
8648         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
8649         tcg_gen_neg_i64(tcg_rd, tcg_rd);
8650         break;
8651     case 0x8: /* CMGT, CMGE */
8652         cond = u ? TCG_COND_GE : TCG_COND_GT;
8653         goto do_cmop;
8654     case 0x9: /* CMEQ, CMLE */
8655         cond = u ? TCG_COND_LE : TCG_COND_EQ;
8656         goto do_cmop;
8657     case 0xb: /* ABS, NEG */
8658         if (u) {
8659             tcg_gen_neg_i64(tcg_rd, tcg_rn);
8660         } else {
8661             TCGv_i64 tcg_zero = tcg_const_i64(0);
8662             tcg_gen_neg_i64(tcg_rd, tcg_rn);
8663             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
8664                                 tcg_rn, tcg_rd);
8665             tcg_temp_free_i64(tcg_zero);
8666         }
8667         break;
8668     case 0x2f: /* FABS */
8669         gen_helper_vfp_absd(tcg_rd, tcg_rn);
8670         break;
8671     case 0x6f: /* FNEG */
8672         gen_helper_vfp_negd(tcg_rd, tcg_rn);
8673         break;
8674     case 0x7f: /* FSQRT */
8675         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
8676         break;
8677     case 0x1a: /* FCVTNS */
8678     case 0x1b: /* FCVTMS */
8679     case 0x1c: /* FCVTAS */
8680     case 0x3a: /* FCVTPS */
8681     case 0x3b: /* FCVTZS */
8682     {
8683         TCGv_i32 tcg_shift = tcg_const_i32(0);
8684         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8685         tcg_temp_free_i32(tcg_shift);
8686         break;
8687     }
8688     case 0x5a: /* FCVTNU */
8689     case 0x5b: /* FCVTMU */
8690     case 0x5c: /* FCVTAU */
8691     case 0x7a: /* FCVTPU */
8692     case 0x7b: /* FCVTZU */
8693     {
8694         TCGv_i32 tcg_shift = tcg_const_i32(0);
8695         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8696         tcg_temp_free_i32(tcg_shift);
8697         break;
8698     }
8699     case 0x18: /* FRINTN */
8700     case 0x19: /* FRINTM */
8701     case 0x38: /* FRINTP */
8702     case 0x39: /* FRINTZ */
8703     case 0x58: /* FRINTA */
8704     case 0x79: /* FRINTI */
8705         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
8706         break;
8707     case 0x59: /* FRINTX */
8708         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
8709         break;
8710     default:
8711         g_assert_not_reached();
8712     }
8713 }
8714
8715 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
8716                                    bool is_scalar, bool is_u, bool is_q,
8717                                    int size, int rn, int rd)
8718 {
8719     bool is_double = (size == MO_64);
8720     TCGv_ptr fpst;
8721
8722     if (!fp_access_check(s)) {
8723         return;
8724     }
8725
8726     fpst = get_fpstatus_ptr(size == MO_16);
8727
8728     if (is_double) {
8729         TCGv_i64 tcg_op = tcg_temp_new_i64();
8730         TCGv_i64 tcg_zero = tcg_const_i64(0);
8731         TCGv_i64 tcg_res = tcg_temp_new_i64();
8732         NeonGenTwoDoubleOPFn *genfn;
8733         bool swap = false;
8734         int pass;
8735
8736         switch (opcode) {
8737         case 0x2e: /* FCMLT (zero) */
8738             swap = true;
8739             /* fallthrough */
8740         case 0x2c: /* FCMGT (zero) */
8741             genfn = gen_helper_neon_cgt_f64;
8742             break;
8743         case 0x2d: /* FCMEQ (zero) */
8744             genfn = gen_helper_neon_ceq_f64;
8745             break;
8746         case 0x6d: /* FCMLE (zero) */
8747             swap = true;
8748             /* fall through */
8749         case 0x6c: /* FCMGE (zero) */
8750             genfn = gen_helper_neon_cge_f64;
8751             break;
8752         default:
8753             g_assert_not_reached();
8754         }
8755
8756         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8757             read_vec_element(s, tcg_op, rn, pass, MO_64);
8758             if (swap) {
8759                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
8760             } else {
8761                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
8762             }
8763             write_vec_element(s, tcg_res, rd, pass, MO_64);
8764         }
8765         tcg_temp_free_i64(tcg_res);
8766         tcg_temp_free_i64(tcg_zero);
8767         tcg_temp_free_i64(tcg_op);
8768
8769         clear_vec_high(s, !is_scalar, rd);
8770     } else {
8771         TCGv_i32 tcg_op = tcg_temp_new_i32();
8772         TCGv_i32 tcg_zero = tcg_const_i32(0);
8773         TCGv_i32 tcg_res = tcg_temp_new_i32();
8774         NeonGenTwoSingleOPFn *genfn;
8775         bool swap = false;
8776         int pass, maxpasses;
8777
8778         if (size == MO_16) {
8779             switch (opcode) {
8780             case 0x2e: /* FCMLT (zero) */
8781                 swap = true;
8782                 /* fall through */
8783             case 0x2c: /* FCMGT (zero) */
8784                 genfn = gen_helper_advsimd_cgt_f16;
8785                 break;
8786             case 0x2d: /* FCMEQ (zero) */
8787                 genfn = gen_helper_advsimd_ceq_f16;
8788                 break;
8789             case 0x6d: /* FCMLE (zero) */
8790                 swap = true;
8791                 /* fall through */
8792             case 0x6c: /* FCMGE (zero) */
8793                 genfn = gen_helper_advsimd_cge_f16;
8794                 break;
8795             default:
8796                 g_assert_not_reached();
8797             }
8798         } else {
8799             switch (opcode) {
8800             case 0x2e: /* FCMLT (zero) */
8801                 swap = true;
8802                 /* fall through */
8803             case 0x2c: /* FCMGT (zero) */
8804                 genfn = gen_helper_neon_cgt_f32;
8805                 break;
8806             case 0x2d: /* FCMEQ (zero) */
8807                 genfn = gen_helper_neon_ceq_f32;
8808                 break;
8809             case 0x6d: /* FCMLE (zero) */
8810                 swap = true;
8811                 /* fall through */
8812             case 0x6c: /* FCMGE (zero) */
8813                 genfn = gen_helper_neon_cge_f32;
8814                 break;
8815             default:
8816                 g_assert_not_reached();
8817             }
8818         }
8819
8820         if (is_scalar) {
8821             maxpasses = 1;
8822         } else {
8823             int vector_size = 8 << is_q;
8824             maxpasses = vector_size >> size;
8825         }
8826
8827         for (pass = 0; pass < maxpasses; pass++) {
8828             read_vec_element_i32(s, tcg_op, rn, pass, size);
8829             if (swap) {
8830                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
8831             } else {
8832                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
8833             }
8834             if (is_scalar) {
8835                 write_fp_sreg(s, rd, tcg_res);
8836             } else {
8837                 write_vec_element_i32(s, tcg_res, rd, pass, size);
8838             }
8839         }
8840         tcg_temp_free_i32(tcg_res);
8841         tcg_temp_free_i32(tcg_zero);
8842         tcg_temp_free_i32(tcg_op);
8843         if (!is_scalar) {
8844             clear_vec_high(s, is_q, rd);
8845         }
8846     }
8847
8848     tcg_temp_free_ptr(fpst);
8849 }
8850
8851 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
8852                                     bool is_scalar, bool is_u, bool is_q,
8853                                     int size, int rn, int rd)
8854 {
8855     bool is_double = (size == 3);
8856     TCGv_ptr fpst = get_fpstatus_ptr(false);
8857
8858     if (is_double) {
8859         TCGv_i64 tcg_op = tcg_temp_new_i64();
8860         TCGv_i64 tcg_res = tcg_temp_new_i64();
8861         int pass;
8862
8863         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8864             read_vec_element(s, tcg_op, rn, pass, MO_64);
8865             switch (opcode) {
8866             case 0x3d: /* FRECPE */
8867                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
8868                 break;
8869             case 0x3f: /* FRECPX */
8870                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
8871                 break;
8872             case 0x7d: /* FRSQRTE */
8873                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
8874                 break;
8875             default:
8876                 g_assert_not_reached();
8877             }
8878             write_vec_element(s, tcg_res, rd, pass, MO_64);
8879         }
8880         tcg_temp_free_i64(tcg_res);
8881         tcg_temp_free_i64(tcg_op);
8882         clear_vec_high(s, !is_scalar, rd);
8883     } else {
8884         TCGv_i32 tcg_op = tcg_temp_new_i32();
8885         TCGv_i32 tcg_res = tcg_temp_new_i32();
8886         int pass, maxpasses;
8887
8888         if (is_scalar) {
8889             maxpasses = 1;
8890         } else {
8891             maxpasses = is_q ? 4 : 2;
8892         }
8893
8894         for (pass = 0; pass < maxpasses; pass++) {
8895             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
8896
8897             switch (opcode) {
8898             case 0x3c: /* URECPE */
8899                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
8900                 break;
8901             case 0x3d: /* FRECPE */
8902                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
8903                 break;
8904             case 0x3f: /* FRECPX */
8905                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
8906                 break;
8907             case 0x7d: /* FRSQRTE */
8908                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
8909                 break;
8910             default:
8911                 g_assert_not_reached();
8912             }
8913
8914             if (is_scalar) {
8915                 write_fp_sreg(s, rd, tcg_res);
8916             } else {
8917                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8918             }
8919         }
8920         tcg_temp_free_i32(tcg_res);
8921         tcg_temp_free_i32(tcg_op);
8922         if (!is_scalar) {
8923             clear_vec_high(s, is_q, rd);
8924         }
8925     }
8926     tcg_temp_free_ptr(fpst);
8927 }
8928
8929 static void handle_2misc_narrow(DisasContext *s, bool scalar,
8930                                 int opcode, bool u, bool is_q,
8931                                 int size, int rn, int rd)
8932 {
8933     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
8934      * in the source becomes a size element in the destination).
8935      */
8936     int pass;
8937     TCGv_i32 tcg_res[2];
8938     int destelt = is_q ? 2 : 0;
8939     int passes = scalar ? 1 : 2;
8940
8941     if (scalar) {
8942         tcg_res[1] = tcg_const_i32(0);
8943     }
8944
8945     for (pass = 0; pass < passes; pass++) {
8946         TCGv_i64 tcg_op = tcg_temp_new_i64();
8947         NeonGenNarrowFn *genfn = NULL;
8948         NeonGenNarrowEnvFn *genenvfn = NULL;
8949
8950         if (scalar) {
8951             read_vec_element(s, tcg_op, rn, pass, size + 1);
8952         } else {
8953             read_vec_element(s, tcg_op, rn, pass, MO_64);
8954         }
8955         tcg_res[pass] = tcg_temp_new_i32();
8956
8957         switch (opcode) {
8958         case 0x12: /* XTN, SQXTUN */
8959         {
8960             static NeonGenNarrowFn * const xtnfns[3] = {
8961                 gen_helper_neon_narrow_u8,
8962                 gen_helper_neon_narrow_u16,
8963                 tcg_gen_extrl_i64_i32,
8964             };
8965             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
8966                 gen_helper_neon_unarrow_sat8,
8967                 gen_helper_neon_unarrow_sat16,
8968                 gen_helper_neon_unarrow_sat32,
8969             };
8970             if (u) {
8971                 genenvfn = sqxtunfns[size];
8972             } else {
8973                 genfn = xtnfns[size];
8974             }
8975             break;
8976         }
8977         case 0x14: /* SQXTN, UQXTN */
8978         {
8979             static NeonGenNarrowEnvFn * const fns[3][2] = {
8980                 { gen_helper_neon_narrow_sat_s8,
8981                   gen_helper_neon_narrow_sat_u8 },
8982                 { gen_helper_neon_narrow_sat_s16,
8983                   gen_helper_neon_narrow_sat_u16 },
8984                 { gen_helper_neon_narrow_sat_s32,
8985                   gen_helper_neon_narrow_sat_u32 },
8986             };
8987             genenvfn = fns[size][u];
8988             break;
8989         }
8990         case 0x16: /* FCVTN, FCVTN2 */
8991             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
8992             if (size == 2) {
8993                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
8994             } else {
8995                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
8996                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
8997                 TCGv_ptr fpst = get_fpstatus_ptr(false);
8998                 TCGv_i32 ahp = get_ahp_flag();
8999
9000                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
9001                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, fpst, ahp);
9002                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, fpst, ahp);
9003                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
9004                 tcg_temp_free_i32(tcg_lo);
9005                 tcg_temp_free_i32(tcg_hi);
9006                 tcg_temp_free_ptr(fpst);
9007                 tcg_temp_free_i32(ahp);
9008             }
9009             break;
9010         case 0x56:  /* FCVTXN, FCVTXN2 */
9011             /* 64 bit to 32 bit float conversion
9012              * with von Neumann rounding (round to odd)
9013              */
9014             assert(size == 2);
9015             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
9016             break;
9017         default:
9018             g_assert_not_reached();
9019         }
9020
9021         if (genfn) {
9022             genfn(tcg_res[pass], tcg_op);
9023         } else if (genenvfn) {
9024             genenvfn(tcg_res[pass], cpu_env, tcg_op);
9025         }
9026
9027         tcg_temp_free_i64(tcg_op);
9028     }
9029
9030     for (pass = 0; pass < 2; pass++) {
9031         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
9032         tcg_temp_free_i32(tcg_res[pass]);
9033     }
9034     clear_vec_high(s, is_q, rd);
9035 }
9036
9037 /* Remaining saturating accumulating ops */
9038 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
9039                                 bool is_q, int size, int rn, int rd)
9040 {
9041     bool is_double = (size == 3);
9042
9043     if (is_double) {
9044         TCGv_i64 tcg_rn = tcg_temp_new_i64();
9045         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9046         int pass;
9047
9048         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
9049             read_vec_element(s, tcg_rn, rn, pass, MO_64);
9050             read_vec_element(s, tcg_rd, rd, pass, MO_64);
9051
9052             if (is_u) { /* USQADD */
9053                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9054             } else { /* SUQADD */
9055                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9056             }
9057             write_vec_element(s, tcg_rd, rd, pass, MO_64);
9058         }
9059         tcg_temp_free_i64(tcg_rd);
9060         tcg_temp_free_i64(tcg_rn);
9061         clear_vec_high(s, !is_scalar, rd);
9062     } else {
9063         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9064         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9065         int pass, maxpasses;
9066
9067         if (is_scalar) {
9068             maxpasses = 1;
9069         } else {
9070             maxpasses = is_q ? 4 : 2;
9071         }
9072
9073         for (pass = 0; pass < maxpasses; pass++) {
9074             if (is_scalar) {
9075                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
9076                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
9077             } else {
9078                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
9079                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9080             }
9081
9082             if (is_u) { /* USQADD */
9083                 switch (size) {
9084                 case 0:
9085                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9086                     break;
9087                 case 1:
9088                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9089                     break;
9090                 case 2:
9091                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9092                     break;
9093                 default:
9094                     g_assert_not_reached();
9095                 }
9096             } else { /* SUQADD */
9097                 switch (size) {
9098                 case 0:
9099                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9100                     break;
9101                 case 1:
9102                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9103                     break;
9104                 case 2:
9105                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
9106                     break;
9107                 default:
9108                     g_assert_not_reached();
9109                 }
9110             }
9111
9112             if (is_scalar) {
9113                 TCGv_i64 tcg_zero = tcg_const_i64(0);
9114                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
9115                 tcg_temp_free_i64(tcg_zero);
9116             }
9117             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
9118         }
9119         tcg_temp_free_i32(tcg_rd);
9120         tcg_temp_free_i32(tcg_rn);
9121         clear_vec_high(s, is_q, rd);
9122     }
9123 }
9124
9125 /* AdvSIMD scalar two reg misc
9126  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
9127  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9128  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
9129  * +-----+---+-----------+------+-----------+--------+-----+------+------+
9130  */
9131 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
9132 {
9133     int rd = extract32(insn, 0, 5);
9134     int rn = extract32(insn, 5, 5);
9135     int opcode = extract32(insn, 12, 5);
9136     int size = extract32(insn, 22, 2);
9137     bool u = extract32(insn, 29, 1);
9138     bool is_fcvt = false;
9139     int rmode;
9140     TCGv_i32 tcg_rmode;
9141     TCGv_ptr tcg_fpstatus;
9142
9143     switch (opcode) {
9144     case 0x3: /* USQADD / SUQADD*/
9145         if (!fp_access_check(s)) {
9146             return;
9147         }
9148         handle_2misc_satacc(s, true, u, false, size, rn, rd);
9149         return;
9150     case 0x7: /* SQABS / SQNEG */
9151         break;
9152     case 0xa: /* CMLT */
9153         if (u) {
9154             unallocated_encoding(s);
9155             return;
9156         }
9157         /* fall through */
9158     case 0x8: /* CMGT, CMGE */
9159     case 0x9: /* CMEQ, CMLE */
9160     case 0xb: /* ABS, NEG */
9161         if (size != 3) {
9162             unallocated_encoding(s);
9163             return;
9164         }
9165         break;
9166     case 0x12: /* SQXTUN */
9167         if (!u) {
9168             unallocated_encoding(s);
9169             return;
9170         }
9171         /* fall through */
9172     case 0x14: /* SQXTN, UQXTN */
9173         if (size == 3) {
9174             unallocated_encoding(s);
9175             return;
9176         }
9177         if (!fp_access_check(s)) {
9178             return;
9179         }
9180         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
9181         return;
9182     case 0xc ... 0xf:
9183     case 0x16 ... 0x1d:
9184     case 0x1f:
9185         /* Floating point: U, size[1] and opcode indicate operation;
9186          * size[0] indicates single or double precision.
9187          */
9188         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
9189         size = extract32(size, 0, 1) ? 3 : 2;
9190         switch (opcode) {
9191         case 0x2c: /* FCMGT (zero) */
9192         case 0x2d: /* FCMEQ (zero) */
9193         case 0x2e: /* FCMLT (zero) */
9194         case 0x6c: /* FCMGE (zero) */
9195         case 0x6d: /* FCMLE (zero) */
9196             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
9197             return;
9198         case 0x1d: /* SCVTF */
9199         case 0x5d: /* UCVTF */
9200         {
9201             bool is_signed = (opcode == 0x1d);
9202             if (!fp_access_check(s)) {
9203                 return;
9204             }
9205             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
9206             return;
9207         }
9208         case 0x3d: /* FRECPE */
9209         case 0x3f: /* FRECPX */
9210         case 0x7d: /* FRSQRTE */
9211             if (!fp_access_check(s)) {
9212                 return;
9213             }
9214             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
9215             return;
9216         case 0x1a: /* FCVTNS */
9217         case 0x1b: /* FCVTMS */
9218         case 0x3a: /* FCVTPS */
9219         case 0x3b: /* FCVTZS */
9220         case 0x5a: /* FCVTNU */
9221         case 0x5b: /* FCVTMU */
9222         case 0x7a: /* FCVTPU */
9223         case 0x7b: /* FCVTZU */
9224             is_fcvt = true;
9225             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
9226             break;
9227         case 0x1c: /* FCVTAS */
9228         case 0x5c: /* FCVTAU */
9229             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
9230             is_fcvt = true;
9231             rmode = FPROUNDING_TIEAWAY;
9232             break;
9233         case 0x56: /* FCVTXN, FCVTXN2 */
9234             if (size == 2) {
9235                 unallocated_encoding(s);
9236                 return;
9237             }
9238             if (!fp_access_check(s)) {
9239                 return;
9240             }
9241             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
9242             return;
9243         default:
9244             unallocated_encoding(s);
9245             return;
9246         }
9247         break;
9248     default:
9249         unallocated_encoding(s);
9250         return;
9251     }
9252
9253     if (!fp_access_check(s)) {
9254         return;
9255     }
9256
9257     if (is_fcvt) {
9258         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
9259         tcg_fpstatus = get_fpstatus_ptr(false);
9260         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9261     } else {
9262         tcg_rmode = NULL;
9263         tcg_fpstatus = NULL;
9264     }
9265
9266     if (size == 3) {
9267         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
9268         TCGv_i64 tcg_rd = tcg_temp_new_i64();
9269
9270         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
9271         write_fp_dreg(s, rd, tcg_rd);
9272         tcg_temp_free_i64(tcg_rd);
9273         tcg_temp_free_i64(tcg_rn);
9274     } else {
9275         TCGv_i32 tcg_rn = tcg_temp_new_i32();
9276         TCGv_i32 tcg_rd = tcg_temp_new_i32();
9277
9278         read_vec_element_i32(s, tcg_rn, rn, 0, size);
9279
9280         switch (opcode) {
9281         case 0x7: /* SQABS, SQNEG */
9282         {
9283             NeonGenOneOpEnvFn *genfn;
9284             static NeonGenOneOpEnvFn * const fns[3][2] = {
9285                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
9286                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
9287                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
9288             };
9289             genfn = fns[size][u];
9290             genfn(tcg_rd, cpu_env, tcg_rn);
9291             break;
9292         }
9293         case 0x1a: /* FCVTNS */
9294         case 0x1b: /* FCVTMS */
9295         case 0x1c: /* FCVTAS */
9296         case 0x3a: /* FCVTPS */
9297         case 0x3b: /* FCVTZS */
9298         {
9299             TCGv_i32 tcg_shift = tcg_const_i32(0);
9300             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9301             tcg_temp_free_i32(tcg_shift);
9302             break;
9303         }
9304         case 0x5a: /* FCVTNU */
9305         case 0x5b: /* FCVTMU */
9306         case 0x5c: /* FCVTAU */
9307         case 0x7a: /* FCVTPU */
9308         case 0x7b: /* FCVTZU */
9309         {
9310             TCGv_i32 tcg_shift = tcg_const_i32(0);
9311             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
9312             tcg_temp_free_i32(tcg_shift);
9313             break;
9314         }
9315         default:
9316             g_assert_not_reached();
9317         }
9318
9319         write_fp_sreg(s, rd, tcg_rd);
9320         tcg_temp_free_i32(tcg_rd);
9321         tcg_temp_free_i32(tcg_rn);
9322     }
9323
9324     if (is_fcvt) {
9325         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
9326         tcg_temp_free_i32(tcg_rmode);
9327         tcg_temp_free_ptr(tcg_fpstatus);
9328     }
9329 }
9330
9331 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9332 {
9333     tcg_gen_vec_sar8i_i64(a, a, shift);
9334     tcg_gen_vec_add8_i64(d, d, a);
9335 }
9336
9337 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9338 {
9339     tcg_gen_vec_sar16i_i64(a, a, shift);
9340     tcg_gen_vec_add16_i64(d, d, a);
9341 }
9342
9343 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
9344 {
9345     tcg_gen_sari_i32(a, a, shift);
9346     tcg_gen_add_i32(d, d, a);
9347 }
9348
9349 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9350 {
9351     tcg_gen_sari_i64(a, a, shift);
9352     tcg_gen_add_i64(d, d, a);
9353 }
9354
9355 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
9356 {
9357     tcg_gen_sari_vec(vece, a, a, sh);
9358     tcg_gen_add_vec(vece, d, d, a);
9359 }
9360
9361 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9362 {
9363     tcg_gen_vec_shr8i_i64(a, a, shift);
9364     tcg_gen_vec_add8_i64(d, d, a);
9365 }
9366
9367 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9368 {
9369     tcg_gen_vec_shr16i_i64(a, a, shift);
9370     tcg_gen_vec_add16_i64(d, d, a);
9371 }
9372
9373 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
9374 {
9375     tcg_gen_shri_i32(a, a, shift);
9376     tcg_gen_add_i32(d, d, a);
9377 }
9378
9379 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9380 {
9381     tcg_gen_shri_i64(a, a, shift);
9382     tcg_gen_add_i64(d, d, a);
9383 }
9384
9385 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
9386 {
9387     tcg_gen_shri_vec(vece, a, a, sh);
9388     tcg_gen_add_vec(vece, d, d, a);
9389 }
9390
9391 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9392 {
9393     uint64_t mask = dup_const(MO_8, 0xff >> shift);
9394     TCGv_i64 t = tcg_temp_new_i64();
9395
9396     tcg_gen_shri_i64(t, a, shift);
9397     tcg_gen_andi_i64(t, t, mask);
9398     tcg_gen_andi_i64(d, d, ~mask);
9399     tcg_gen_or_i64(d, d, t);
9400     tcg_temp_free_i64(t);
9401 }
9402
9403 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9404 {
9405     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
9406     TCGv_i64 t = tcg_temp_new_i64();
9407
9408     tcg_gen_shri_i64(t, a, shift);
9409     tcg_gen_andi_i64(t, t, mask);
9410     tcg_gen_andi_i64(d, d, ~mask);
9411     tcg_gen_or_i64(d, d, t);
9412     tcg_temp_free_i64(t);
9413 }
9414
9415 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
9416 {
9417     tcg_gen_shri_i32(a, a, shift);
9418     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
9419 }
9420
9421 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9422 {
9423     tcg_gen_shri_i64(a, a, shift);
9424     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
9425 }
9426
9427 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
9428 {
9429     uint64_t mask = (2ull << ((8 << vece) - 1)) - 1;
9430     TCGv_vec t = tcg_temp_new_vec_matching(d);
9431     TCGv_vec m = tcg_temp_new_vec_matching(d);
9432
9433     tcg_gen_dupi_vec(vece, m, mask ^ (mask >> sh));
9434     tcg_gen_shri_vec(vece, t, a, sh);
9435     tcg_gen_and_vec(vece, d, d, m);
9436     tcg_gen_or_vec(vece, d, d, t);
9437
9438     tcg_temp_free_vec(t);
9439     tcg_temp_free_vec(m);
9440 }
9441
9442 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
9443 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
9444                                  int immh, int immb, int opcode, int rn, int rd)
9445 {
9446     static const GVecGen2i ssra_op[4] = {
9447         { .fni8 = gen_ssra8_i64,
9448           .fniv = gen_ssra_vec,
9449           .load_dest = true,
9450           .opc = INDEX_op_sari_vec,
9451           .vece = MO_8 },
9452         { .fni8 = gen_ssra16_i64,
9453           .fniv = gen_ssra_vec,
9454           .load_dest = true,
9455           .opc = INDEX_op_sari_vec,
9456           .vece = MO_16 },
9457         { .fni4 = gen_ssra32_i32,
9458           .fniv = gen_ssra_vec,
9459           .load_dest = true,
9460           .opc = INDEX_op_sari_vec,
9461           .vece = MO_32 },
9462         { .fni8 = gen_ssra64_i64,
9463           .fniv = gen_ssra_vec,
9464           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9465           .load_dest = true,
9466           .opc = INDEX_op_sari_vec,
9467           .vece = MO_64 },
9468     };
9469     static const GVecGen2i usra_op[4] = {
9470         { .fni8 = gen_usra8_i64,
9471           .fniv = gen_usra_vec,
9472           .load_dest = true,
9473           .opc = INDEX_op_shri_vec,
9474           .vece = MO_8, },
9475         { .fni8 = gen_usra16_i64,
9476           .fniv = gen_usra_vec,
9477           .load_dest = true,
9478           .opc = INDEX_op_shri_vec,
9479           .vece = MO_16, },
9480         { .fni4 = gen_usra32_i32,
9481           .fniv = gen_usra_vec,
9482           .load_dest = true,
9483           .opc = INDEX_op_shri_vec,
9484           .vece = MO_32, },
9485         { .fni8 = gen_usra64_i64,
9486           .fniv = gen_usra_vec,
9487           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9488           .load_dest = true,
9489           .opc = INDEX_op_shri_vec,
9490           .vece = MO_64, },
9491     };
9492     static const GVecGen2i sri_op[4] = {
9493         { .fni8 = gen_shr8_ins_i64,
9494           .fniv = gen_shr_ins_vec,
9495           .load_dest = true,
9496           .opc = INDEX_op_shri_vec,
9497           .vece = MO_8 },
9498         { .fni8 = gen_shr16_ins_i64,
9499           .fniv = gen_shr_ins_vec,
9500           .load_dest = true,
9501           .opc = INDEX_op_shri_vec,
9502           .vece = MO_16 },
9503         { .fni4 = gen_shr32_ins_i32,
9504           .fniv = gen_shr_ins_vec,
9505           .load_dest = true,
9506           .opc = INDEX_op_shri_vec,
9507           .vece = MO_32 },
9508         { .fni8 = gen_shr64_ins_i64,
9509           .fniv = gen_shr_ins_vec,
9510           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9511           .load_dest = true,
9512           .opc = INDEX_op_shri_vec,
9513           .vece = MO_64 },
9514     };
9515
9516     int size = 32 - clz32(immh) - 1;
9517     int immhb = immh << 3 | immb;
9518     int shift = 2 * (8 << size) - immhb;
9519     bool accumulate = false;
9520     int dsize = is_q ? 128 : 64;
9521     int esize = 8 << size;
9522     int elements = dsize/esize;
9523     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
9524     TCGv_i64 tcg_rn = new_tmp_a64(s);
9525     TCGv_i64 tcg_rd = new_tmp_a64(s);
9526     TCGv_i64 tcg_round;
9527     uint64_t round_const;
9528     int i;
9529
9530     if (extract32(immh, 3, 1) && !is_q) {
9531         unallocated_encoding(s);
9532         return;
9533     }
9534     tcg_debug_assert(size <= 3);
9535
9536     if (!fp_access_check(s)) {
9537         return;
9538     }
9539
9540     switch (opcode) {
9541     case 0x02: /* SSRA / USRA (accumulate) */
9542         if (is_u) {
9543             /* Shift count same as element size produces zero to add.  */
9544             if (shift == 8 << size) {
9545                 goto done;
9546             }
9547             gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
9548         } else {
9549             /* Shift count same as element size produces all sign to add.  */
9550             if (shift == 8 << size) {
9551                 shift -= 1;
9552             }
9553             gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
9554         }
9555         return;
9556     case 0x08: /* SRI */
9557         /* Shift count same as element size is valid but does nothing.  */
9558         if (shift == 8 << size) {
9559             goto done;
9560         }
9561         gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
9562         return;
9563
9564     case 0x00: /* SSHR / USHR */
9565         if (is_u) {
9566             if (shift == 8 << size) {
9567                 /* Shift count the same size as element size produces zero.  */
9568                 tcg_gen_gvec_dup8i(vec_full_reg_offset(s, rd),
9569                                    is_q ? 16 : 8, vec_full_reg_size(s), 0);
9570             } else {
9571                 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
9572             }
9573         } else {
9574             /* Shift count the same size as element size produces all sign.  */
9575             if (shift == 8 << size) {
9576                 shift -= 1;
9577             }
9578             gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size);
9579         }
9580         return;
9581
9582     case 0x04: /* SRSHR / URSHR (rounding) */
9583         break;
9584     case 0x06: /* SRSRA / URSRA (accum + rounding) */
9585         accumulate = true;
9586         break;
9587     default:
9588         g_assert_not_reached();
9589     }
9590
9591     round_const = 1ULL << (shift - 1);
9592     tcg_round = tcg_const_i64(round_const);
9593
9594     for (i = 0; i < elements; i++) {
9595         read_vec_element(s, tcg_rn, rn, i, memop);
9596         if (accumulate) {
9597             read_vec_element(s, tcg_rd, rd, i, memop);
9598         }
9599
9600         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
9601                                 accumulate, is_u, size, shift);
9602
9603         write_vec_element(s, tcg_rd, rd, i, size);
9604     }
9605     tcg_temp_free_i64(tcg_round);
9606
9607  done:
9608     clear_vec_high(s, is_q, rd);
9609 }
9610
9611 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9612 {
9613     uint64_t mask = dup_const(MO_8, 0xff << shift);
9614     TCGv_i64 t = tcg_temp_new_i64();
9615
9616     tcg_gen_shli_i64(t, a, shift);
9617     tcg_gen_andi_i64(t, t, mask);
9618     tcg_gen_andi_i64(d, d, ~mask);
9619     tcg_gen_or_i64(d, d, t);
9620     tcg_temp_free_i64(t);
9621 }
9622
9623 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9624 {
9625     uint64_t mask = dup_const(MO_16, 0xffff << shift);
9626     TCGv_i64 t = tcg_temp_new_i64();
9627
9628     tcg_gen_shli_i64(t, a, shift);
9629     tcg_gen_andi_i64(t, t, mask);
9630     tcg_gen_andi_i64(d, d, ~mask);
9631     tcg_gen_or_i64(d, d, t);
9632     tcg_temp_free_i64(t);
9633 }
9634
9635 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
9636 {
9637     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
9638 }
9639
9640 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9641 {
9642     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
9643 }
9644
9645 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
9646 {
9647     uint64_t mask = (1ull << sh) - 1;
9648     TCGv_vec t = tcg_temp_new_vec_matching(d);
9649     TCGv_vec m = tcg_temp_new_vec_matching(d);
9650
9651     tcg_gen_dupi_vec(vece, m, mask);
9652     tcg_gen_shli_vec(vece, t, a, sh);
9653     tcg_gen_and_vec(vece, d, d, m);
9654     tcg_gen_or_vec(vece, d, d, t);
9655
9656     tcg_temp_free_vec(t);
9657     tcg_temp_free_vec(m);
9658 }
9659
9660 /* SHL/SLI - Vector shift left */
9661 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
9662                                  int immh, int immb, int opcode, int rn, int rd)
9663 {
9664     static const GVecGen2i shi_op[4] = {
9665         { .fni8 = gen_shl8_ins_i64,
9666           .fniv = gen_shl_ins_vec,
9667           .opc = INDEX_op_shli_vec,
9668           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9669           .load_dest = true,
9670           .vece = MO_8 },
9671         { .fni8 = gen_shl16_ins_i64,
9672           .fniv = gen_shl_ins_vec,
9673           .opc = INDEX_op_shli_vec,
9674           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9675           .load_dest = true,
9676           .vece = MO_16 },
9677         { .fni4 = gen_shl32_ins_i32,
9678           .fniv = gen_shl_ins_vec,
9679           .opc = INDEX_op_shli_vec,
9680           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9681           .load_dest = true,
9682           .vece = MO_32 },
9683         { .fni8 = gen_shl64_ins_i64,
9684           .fniv = gen_shl_ins_vec,
9685           .opc = INDEX_op_shli_vec,
9686           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9687           .load_dest = true,
9688           .vece = MO_64 },
9689     };
9690     int size = 32 - clz32(immh) - 1;
9691     int immhb = immh << 3 | immb;
9692     int shift = immhb - (8 << size);
9693
9694     if (extract32(immh, 3, 1) && !is_q) {
9695         unallocated_encoding(s);
9696         return;
9697     }
9698
9699     if (size > 3 && !is_q) {
9700         unallocated_encoding(s);
9701         return;
9702     }
9703
9704     if (!fp_access_check(s)) {
9705         return;
9706     }
9707
9708     if (insert) {
9709         gen_gvec_op2i(s, is_q, rd, rn, shift, &shi_op[size]);
9710     } else {
9711         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
9712     }
9713 }
9714
9715 /* USHLL/SHLL - Vector shift left with widening */
9716 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
9717                                  int immh, int immb, int opcode, int rn, int rd)
9718 {
9719     int size = 32 - clz32(immh) - 1;
9720     int immhb = immh << 3 | immb;
9721     int shift = immhb - (8 << size);
9722     int dsize = 64;
9723     int esize = 8 << size;
9724     int elements = dsize/esize;
9725     TCGv_i64 tcg_rn = new_tmp_a64(s);
9726     TCGv_i64 tcg_rd = new_tmp_a64(s);
9727     int i;
9728
9729     if (size >= 3) {
9730         unallocated_encoding(s);
9731         return;
9732     }
9733
9734     if (!fp_access_check(s)) {
9735         return;
9736     }
9737
9738     /* For the LL variants the store is larger than the load,
9739      * so if rd == rn we would overwrite parts of our input.
9740      * So load everything right now and use shifts in the main loop.
9741      */
9742     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
9743
9744     for (i = 0; i < elements; i++) {
9745         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
9746         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
9747         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
9748         write_vec_element(s, tcg_rd, rd, i, size + 1);
9749     }
9750 }
9751
9752 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
9753 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
9754                                  int immh, int immb, int opcode, int rn, int rd)
9755 {
9756     int immhb = immh << 3 | immb;
9757     int size = 32 - clz32(immh) - 1;
9758     int dsize = 64;
9759     int esize = 8 << size;
9760     int elements = dsize/esize;
9761     int shift = (2 * esize) - immhb;
9762     bool round = extract32(opcode, 0, 1);
9763     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
9764     TCGv_i64 tcg_round;
9765     int i;
9766
9767     if (extract32(immh, 3, 1)) {
9768         unallocated_encoding(s);
9769         return;
9770     }
9771
9772     if (!fp_access_check(s)) {
9773         return;
9774     }
9775
9776     tcg_rn = tcg_temp_new_i64();
9777     tcg_rd = tcg_temp_new_i64();
9778     tcg_final = tcg_temp_new_i64();
9779     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
9780
9781     if (round) {
9782         uint64_t round_const = 1ULL << (shift - 1);
9783         tcg_round = tcg_const_i64(round_const);
9784     } else {
9785         tcg_round = NULL;
9786     }
9787
9788     for (i = 0; i < elements; i++) {
9789         read_vec_element(s, tcg_rn, rn, i, size+1);
9790         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
9791                                 false, true, size+1, shift);
9792
9793         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
9794     }
9795
9796     if (!is_q) {
9797         write_vec_element(s, tcg_final, rd, 0, MO_64);
9798     } else {
9799         write_vec_element(s, tcg_final, rd, 1, MO_64);
9800     }
9801     if (round) {
9802         tcg_temp_free_i64(tcg_round);
9803     }
9804     tcg_temp_free_i64(tcg_rn);
9805     tcg_temp_free_i64(tcg_rd);
9806     tcg_temp_free_i64(tcg_final);
9807
9808     clear_vec_high(s, is_q, rd);
9809 }
9810
9811
9812 /* AdvSIMD shift by immediate
9813  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
9814  * +---+---+---+-------------+------+------+--------+---+------+------+
9815  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
9816  * +---+---+---+-------------+------+------+--------+---+------+------+
9817  */
9818 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
9819 {
9820     int rd = extract32(insn, 0, 5);
9821     int rn = extract32(insn, 5, 5);
9822     int opcode = extract32(insn, 11, 5);
9823     int immb = extract32(insn, 16, 3);
9824     int immh = extract32(insn, 19, 4);
9825     bool is_u = extract32(insn, 29, 1);
9826     bool is_q = extract32(insn, 30, 1);
9827
9828     switch (opcode) {
9829     case 0x08: /* SRI */
9830         if (!is_u) {
9831             unallocated_encoding(s);
9832             return;
9833         }
9834         /* fall through */
9835     case 0x00: /* SSHR / USHR */
9836     case 0x02: /* SSRA / USRA (accumulate) */
9837     case 0x04: /* SRSHR / URSHR (rounding) */
9838     case 0x06: /* SRSRA / URSRA (accum + rounding) */
9839         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
9840         break;
9841     case 0x0a: /* SHL / SLI */
9842         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
9843         break;
9844     case 0x10: /* SHRN */
9845     case 0x11: /* RSHRN / SQRSHRUN */
9846         if (is_u) {
9847             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
9848                                    opcode, rn, rd);
9849         } else {
9850             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
9851         }
9852         break;
9853     case 0x12: /* SQSHRN / UQSHRN */
9854     case 0x13: /* SQRSHRN / UQRSHRN */
9855         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
9856                                opcode, rn, rd);
9857         break;
9858     case 0x14: /* SSHLL / USHLL */
9859         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
9860         break;
9861     case 0x1c: /* SCVTF / UCVTF */
9862         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
9863                                      opcode, rn, rd);
9864         break;
9865     case 0xc: /* SQSHLU */
9866         if (!is_u) {
9867             unallocated_encoding(s);
9868             return;
9869         }
9870         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
9871         break;
9872     case 0xe: /* SQSHL, UQSHL */
9873         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
9874         break;
9875     case 0x1f: /* FCVTZS/ FCVTZU */
9876         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
9877         return;
9878     default:
9879         unallocated_encoding(s);
9880         return;
9881     }
9882 }
9883
9884 /* Generate code to do a "long" addition or subtraction, ie one done in
9885  * TCGv_i64 on vector lanes twice the width specified by size.
9886  */
9887 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
9888                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
9889 {
9890     static NeonGenTwo64OpFn * const fns[3][2] = {
9891         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
9892         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
9893         { tcg_gen_add_i64, tcg_gen_sub_i64 },
9894     };
9895     NeonGenTwo64OpFn *genfn;
9896     assert(size < 3);
9897
9898     genfn = fns[size][is_sub];
9899     genfn(tcg_res, tcg_op1, tcg_op2);
9900 }
9901
9902 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
9903                                 int opcode, int rd, int rn, int rm)
9904 {
9905     /* 3-reg-different widening insns: 64 x 64 -> 128 */
9906     TCGv_i64 tcg_res[2];
9907     int pass, accop;
9908
9909     tcg_res[0] = tcg_temp_new_i64();
9910     tcg_res[1] = tcg_temp_new_i64();
9911
9912     /* Does this op do an adding accumulate, a subtracting accumulate,
9913      * or no accumulate at all?
9914      */
9915     switch (opcode) {
9916     case 5:
9917     case 8:
9918     case 9:
9919         accop = 1;
9920         break;
9921     case 10:
9922     case 11:
9923         accop = -1;
9924         break;
9925     default:
9926         accop = 0;
9927         break;
9928     }
9929
9930     if (accop != 0) {
9931         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
9932         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
9933     }
9934
9935     /* size == 2 means two 32x32->64 operations; this is worth special
9936      * casing because we can generally handle it inline.
9937      */
9938     if (size == 2) {
9939         for (pass = 0; pass < 2; pass++) {
9940             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9941             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9942             TCGv_i64 tcg_passres;
9943             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
9944
9945             int elt = pass + is_q * 2;
9946
9947             read_vec_element(s, tcg_op1, rn, elt, memop);
9948             read_vec_element(s, tcg_op2, rm, elt, memop);
9949
9950             if (accop == 0) {
9951                 tcg_passres = tcg_res[pass];
9952             } else {
9953                 tcg_passres = tcg_temp_new_i64();
9954             }
9955
9956             switch (opcode) {
9957             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9958                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
9959                 break;
9960             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9961                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
9962                 break;
9963             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9964             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9965             {
9966                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
9967                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
9968
9969                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
9970                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
9971                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
9972                                     tcg_passres,
9973                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
9974                 tcg_temp_free_i64(tcg_tmp1);
9975                 tcg_temp_free_i64(tcg_tmp2);
9976                 break;
9977             }
9978             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9979             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9980             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
9981                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
9982                 break;
9983             case 9: /* SQDMLAL, SQDMLAL2 */
9984             case 11: /* SQDMLSL, SQDMLSL2 */
9985             case 13: /* SQDMULL, SQDMULL2 */
9986                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
9987                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
9988                                                   tcg_passres, tcg_passres);
9989                 break;
9990             default:
9991                 g_assert_not_reached();
9992             }
9993
9994             if (opcode == 9 || opcode == 11) {
9995                 /* saturating accumulate ops */
9996                 if (accop < 0) {
9997                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
9998                 }
9999                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
10000                                                   tcg_res[pass], tcg_passres);
10001             } else if (accop > 0) {
10002                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10003             } else if (accop < 0) {
10004                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
10005             }
10006
10007             if (accop != 0) {
10008                 tcg_temp_free_i64(tcg_passres);
10009             }
10010
10011             tcg_temp_free_i64(tcg_op1);
10012             tcg_temp_free_i64(tcg_op2);
10013         }
10014     } else {
10015         /* size 0 or 1, generally helper functions */
10016         for (pass = 0; pass < 2; pass++) {
10017             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10018             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10019             TCGv_i64 tcg_passres;
10020             int elt = pass + is_q * 2;
10021
10022             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
10023             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
10024
10025             if (accop == 0) {
10026                 tcg_passres = tcg_res[pass];
10027             } else {
10028                 tcg_passres = tcg_temp_new_i64();
10029             }
10030
10031             switch (opcode) {
10032             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10033             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10034             {
10035                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
10036                 static NeonGenWidenFn * const widenfns[2][2] = {
10037                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10038                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10039                 };
10040                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
10041
10042                 widenfn(tcg_op2_64, tcg_op2);
10043                 widenfn(tcg_passres, tcg_op1);
10044                 gen_neon_addl(size, (opcode == 2), tcg_passres,
10045                               tcg_passres, tcg_op2_64);
10046                 tcg_temp_free_i64(tcg_op2_64);
10047                 break;
10048             }
10049             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10050             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10051                 if (size == 0) {
10052                     if (is_u) {
10053                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
10054                     } else {
10055                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
10056                     }
10057                 } else {
10058                     if (is_u) {
10059                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
10060                     } else {
10061                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
10062                     }
10063                 }
10064                 break;
10065             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10066             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10067             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
10068                 if (size == 0) {
10069                     if (is_u) {
10070                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
10071                     } else {
10072                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
10073                     }
10074                 } else {
10075                     if (is_u) {
10076                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
10077                     } else {
10078                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10079                     }
10080                 }
10081                 break;
10082             case 9: /* SQDMLAL, SQDMLAL2 */
10083             case 11: /* SQDMLSL, SQDMLSL2 */
10084             case 13: /* SQDMULL, SQDMULL2 */
10085                 assert(size == 1);
10086                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
10087                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
10088                                                   tcg_passres, tcg_passres);
10089                 break;
10090             case 14: /* PMULL */
10091                 assert(size == 0);
10092                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
10093                 break;
10094             default:
10095                 g_assert_not_reached();
10096             }
10097             tcg_temp_free_i32(tcg_op1);
10098             tcg_temp_free_i32(tcg_op2);
10099
10100             if (accop != 0) {
10101                 if (opcode == 9 || opcode == 11) {
10102                     /* saturating accumulate ops */
10103                     if (accop < 0) {
10104                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
10105                     }
10106                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
10107                                                       tcg_res[pass],
10108                                                       tcg_passres);
10109                 } else {
10110                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
10111                                   tcg_res[pass], tcg_passres);
10112                 }
10113                 tcg_temp_free_i64(tcg_passres);
10114             }
10115         }
10116     }
10117
10118     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
10119     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
10120     tcg_temp_free_i64(tcg_res[0]);
10121     tcg_temp_free_i64(tcg_res[1]);
10122 }
10123
10124 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
10125                             int opcode, int rd, int rn, int rm)
10126 {
10127     TCGv_i64 tcg_res[2];
10128     int part = is_q ? 2 : 0;
10129     int pass;
10130
10131     for (pass = 0; pass < 2; pass++) {
10132         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10133         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10134         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
10135         static NeonGenWidenFn * const widenfns[3][2] = {
10136             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
10137             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
10138             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
10139         };
10140         NeonGenWidenFn *widenfn = widenfns[size][is_u];
10141
10142         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10143         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
10144         widenfn(tcg_op2_wide, tcg_op2);
10145         tcg_temp_free_i32(tcg_op2);
10146         tcg_res[pass] = tcg_temp_new_i64();
10147         gen_neon_addl(size, (opcode == 3),
10148                       tcg_res[pass], tcg_op1, tcg_op2_wide);
10149         tcg_temp_free_i64(tcg_op1);
10150         tcg_temp_free_i64(tcg_op2_wide);
10151     }
10152
10153     for (pass = 0; pass < 2; pass++) {
10154         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10155         tcg_temp_free_i64(tcg_res[pass]);
10156     }
10157 }
10158
10159 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
10160 {
10161     tcg_gen_addi_i64(in, in, 1U << 31);
10162     tcg_gen_extrh_i64_i32(res, in);
10163 }
10164
10165 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
10166                                  int opcode, int rd, int rn, int rm)
10167 {
10168     TCGv_i32 tcg_res[2];
10169     int part = is_q ? 2 : 0;
10170     int pass;
10171
10172     for (pass = 0; pass < 2; pass++) {
10173         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10174         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10175         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
10176         static NeonGenNarrowFn * const narrowfns[3][2] = {
10177             { gen_helper_neon_narrow_high_u8,
10178               gen_helper_neon_narrow_round_high_u8 },
10179             { gen_helper_neon_narrow_high_u16,
10180               gen_helper_neon_narrow_round_high_u16 },
10181             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
10182         };
10183         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
10184
10185         read_vec_element(s, tcg_op1, rn, pass, MO_64);
10186         read_vec_element(s, tcg_op2, rm, pass, MO_64);
10187
10188         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
10189
10190         tcg_temp_free_i64(tcg_op1);
10191         tcg_temp_free_i64(tcg_op2);
10192
10193         tcg_res[pass] = tcg_temp_new_i32();
10194         gennarrow(tcg_res[pass], tcg_wideres);
10195         tcg_temp_free_i64(tcg_wideres);
10196     }
10197
10198     for (pass = 0; pass < 2; pass++) {
10199         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
10200         tcg_temp_free_i32(tcg_res[pass]);
10201     }
10202     clear_vec_high(s, is_q, rd);
10203 }
10204
10205 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
10206 {
10207     /* PMULL of 64 x 64 -> 128 is an odd special case because it
10208      * is the only three-reg-diff instruction which produces a
10209      * 128-bit wide result from a single operation. However since
10210      * it's possible to calculate the two halves more or less
10211      * separately we just use two helper calls.
10212      */
10213     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10214     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10215     TCGv_i64 tcg_res = tcg_temp_new_i64();
10216
10217     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
10218     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
10219     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
10220     write_vec_element(s, tcg_res, rd, 0, MO_64);
10221     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
10222     write_vec_element(s, tcg_res, rd, 1, MO_64);
10223
10224     tcg_temp_free_i64(tcg_op1);
10225     tcg_temp_free_i64(tcg_op2);
10226     tcg_temp_free_i64(tcg_res);
10227 }
10228
10229 /* AdvSIMD three different
10230  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
10231  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10232  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
10233  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
10234  */
10235 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
10236 {
10237     /* Instructions in this group fall into three basic classes
10238      * (in each case with the operation working on each element in
10239      * the input vectors):
10240      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
10241      *     128 bit input)
10242      * (2) wide 64 x 128 -> 128
10243      * (3) narrowing 128 x 128 -> 64
10244      * Here we do initial decode, catch unallocated cases and
10245      * dispatch to separate functions for each class.
10246      */
10247     int is_q = extract32(insn, 30, 1);
10248     int is_u = extract32(insn, 29, 1);
10249     int size = extract32(insn, 22, 2);
10250     int opcode = extract32(insn, 12, 4);
10251     int rm = extract32(insn, 16, 5);
10252     int rn = extract32(insn, 5, 5);
10253     int rd = extract32(insn, 0, 5);
10254
10255     switch (opcode) {
10256     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
10257     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
10258         /* 64 x 128 -> 128 */
10259         if (size == 3) {
10260             unallocated_encoding(s);
10261             return;
10262         }
10263         if (!fp_access_check(s)) {
10264             return;
10265         }
10266         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
10267         break;
10268     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
10269     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
10270         /* 128 x 128 -> 64 */
10271         if (size == 3) {
10272             unallocated_encoding(s);
10273             return;
10274         }
10275         if (!fp_access_check(s)) {
10276             return;
10277         }
10278         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
10279         break;
10280     case 14: /* PMULL, PMULL2 */
10281         if (is_u || size == 1 || size == 2) {
10282             unallocated_encoding(s);
10283             return;
10284         }
10285         if (size == 3) {
10286             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
10287                 unallocated_encoding(s);
10288                 return;
10289             }
10290             if (!fp_access_check(s)) {
10291                 return;
10292             }
10293             handle_pmull_64(s, is_q, rd, rn, rm);
10294             return;
10295         }
10296         goto is_widening;
10297     case 9: /* SQDMLAL, SQDMLAL2 */
10298     case 11: /* SQDMLSL, SQDMLSL2 */
10299     case 13: /* SQDMULL, SQDMULL2 */
10300         if (is_u || size == 0) {
10301             unallocated_encoding(s);
10302             return;
10303         }
10304         /* fall through */
10305     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
10306     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
10307     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
10308     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
10309     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
10310     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
10311     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
10312         /* 64 x 64 -> 128 */
10313         if (size == 3) {
10314             unallocated_encoding(s);
10315             return;
10316         }
10317     is_widening:
10318         if (!fp_access_check(s)) {
10319             return;
10320         }
10321
10322         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
10323         break;
10324     default:
10325         /* opcode 15 not allocated */
10326         unallocated_encoding(s);
10327         break;
10328     }
10329 }
10330
10331 static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
10332 {
10333     tcg_gen_xor_i64(rn, rn, rm);
10334     tcg_gen_and_i64(rn, rn, rd);
10335     tcg_gen_xor_i64(rd, rm, rn);
10336 }
10337
10338 static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
10339 {
10340     tcg_gen_xor_i64(rn, rn, rd);
10341     tcg_gen_and_i64(rn, rn, rm);
10342     tcg_gen_xor_i64(rd, rd, rn);
10343 }
10344
10345 static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
10346 {
10347     tcg_gen_xor_i64(rn, rn, rd);
10348     tcg_gen_andc_i64(rn, rn, rm);
10349     tcg_gen_xor_i64(rd, rd, rn);
10350 }
10351
10352 static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
10353 {
10354     tcg_gen_xor_vec(vece, rn, rn, rm);
10355     tcg_gen_and_vec(vece, rn, rn, rd);
10356     tcg_gen_xor_vec(vece, rd, rm, rn);
10357 }
10358
10359 static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
10360 {
10361     tcg_gen_xor_vec(vece, rn, rn, rd);
10362     tcg_gen_and_vec(vece, rn, rn, rm);
10363     tcg_gen_xor_vec(vece, rd, rd, rn);
10364 }
10365
10366 static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
10367 {
10368     tcg_gen_xor_vec(vece, rn, rn, rd);
10369     tcg_gen_andc_vec(vece, rn, rn, rm);
10370     tcg_gen_xor_vec(vece, rd, rd, rn);
10371 }
10372
10373 /* Logic op (opcode == 3) subgroup of C3.6.16. */
10374 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
10375 {
10376     static const GVecGen3 bsl_op = {
10377         .fni8 = gen_bsl_i64,
10378         .fniv = gen_bsl_vec,
10379         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10380         .load_dest = true
10381     };
10382     static const GVecGen3 bit_op = {
10383         .fni8 = gen_bit_i64,
10384         .fniv = gen_bit_vec,
10385         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10386         .load_dest = true
10387     };
10388     static const GVecGen3 bif_op = {
10389         .fni8 = gen_bif_i64,
10390         .fniv = gen_bif_vec,
10391         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10392         .load_dest = true
10393     };
10394
10395     int rd = extract32(insn, 0, 5);
10396     int rn = extract32(insn, 5, 5);
10397     int rm = extract32(insn, 16, 5);
10398     int size = extract32(insn, 22, 2);
10399     bool is_u = extract32(insn, 29, 1);
10400     bool is_q = extract32(insn, 30, 1);
10401
10402     if (!fp_access_check(s)) {
10403         return;
10404     }
10405
10406     switch (size + 4 * is_u) {
10407     case 0: /* AND */
10408         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
10409         return;
10410     case 1: /* BIC */
10411         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
10412         return;
10413     case 2: /* ORR */
10414         if (rn == rm) { /* MOV */
10415             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_mov, 0);
10416         } else {
10417             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
10418         }
10419         return;
10420     case 3: /* ORN */
10421         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
10422         return;
10423     case 4: /* EOR */
10424         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
10425         return;
10426
10427     case 5: /* BSL bitwise select */
10428         gen_gvec_op3(s, is_q, rd, rn, rm, &bsl_op);
10429         return;
10430     case 6: /* BIT, bitwise insert if true */
10431         gen_gvec_op3(s, is_q, rd, rn, rm, &bit_op);
10432         return;
10433     case 7: /* BIF, bitwise insert if false */
10434         gen_gvec_op3(s, is_q, rd, rn, rm, &bif_op);
10435         return;
10436
10437     default:
10438         g_assert_not_reached();
10439     }
10440 }
10441
10442 /* Pairwise op subgroup of C3.6.16.
10443  *
10444  * This is called directly or via the handle_3same_float for float pairwise
10445  * operations where the opcode and size are calculated differently.
10446  */
10447 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
10448                                    int size, int rn, int rm, int rd)
10449 {
10450     TCGv_ptr fpst;
10451     int pass;
10452
10453     /* Floating point operations need fpst */
10454     if (opcode >= 0x58) {
10455         fpst = get_fpstatus_ptr(false);
10456     } else {
10457         fpst = NULL;
10458     }
10459
10460     if (!fp_access_check(s)) {
10461         return;
10462     }
10463
10464     /* These operations work on the concatenated rm:rn, with each pair of
10465      * adjacent elements being operated on to produce an element in the result.
10466      */
10467     if (size == 3) {
10468         TCGv_i64 tcg_res[2];
10469
10470         for (pass = 0; pass < 2; pass++) {
10471             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10472             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10473             int passreg = (pass == 0) ? rn : rm;
10474
10475             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
10476             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
10477             tcg_res[pass] = tcg_temp_new_i64();
10478
10479             switch (opcode) {
10480             case 0x17: /* ADDP */
10481                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
10482                 break;
10483             case 0x58: /* FMAXNMP */
10484                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10485                 break;
10486             case 0x5a: /* FADDP */
10487                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10488                 break;
10489             case 0x5e: /* FMAXP */
10490                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10491                 break;
10492             case 0x78: /* FMINNMP */
10493                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10494                 break;
10495             case 0x7e: /* FMINP */
10496                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10497                 break;
10498             default:
10499                 g_assert_not_reached();
10500             }
10501
10502             tcg_temp_free_i64(tcg_op1);
10503             tcg_temp_free_i64(tcg_op2);
10504         }
10505
10506         for (pass = 0; pass < 2; pass++) {
10507             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10508             tcg_temp_free_i64(tcg_res[pass]);
10509         }
10510     } else {
10511         int maxpass = is_q ? 4 : 2;
10512         TCGv_i32 tcg_res[4];
10513
10514         for (pass = 0; pass < maxpass; pass++) {
10515             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10516             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10517             NeonGenTwoOpFn *genfn = NULL;
10518             int passreg = pass < (maxpass / 2) ? rn : rm;
10519             int passelt = (is_q && (pass & 1)) ? 2 : 0;
10520
10521             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10522             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10523             tcg_res[pass] = tcg_temp_new_i32();
10524
10525             switch (opcode) {
10526             case 0x17: /* ADDP */
10527             {
10528                 static NeonGenTwoOpFn * const fns[3] = {
10529                     gen_helper_neon_padd_u8,
10530                     gen_helper_neon_padd_u16,
10531                     tcg_gen_add_i32,
10532                 };
10533                 genfn = fns[size];
10534                 break;
10535             }
10536             case 0x14: /* SMAXP, UMAXP */
10537             {
10538                 static NeonGenTwoOpFn * const fns[3][2] = {
10539                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10540                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10541                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
10542                 };
10543                 genfn = fns[size][u];
10544                 break;
10545             }
10546             case 0x15: /* SMINP, UMINP */
10547             {
10548                 static NeonGenTwoOpFn * const fns[3][2] = {
10549                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10550                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10551                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
10552                 };
10553                 genfn = fns[size][u];
10554                 break;
10555             }
10556             /* The FP operations are all on single floats (32 bit) */
10557             case 0x58: /* FMAXNMP */
10558                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10559                 break;
10560             case 0x5a: /* FADDP */
10561                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10562                 break;
10563             case 0x5e: /* FMAXP */
10564                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10565                 break;
10566             case 0x78: /* FMINNMP */
10567                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10568                 break;
10569             case 0x7e: /* FMINP */
10570                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10571                 break;
10572             default:
10573                 g_assert_not_reached();
10574             }
10575
10576             /* FP ops called directly, otherwise call now */
10577             if (genfn) {
10578                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
10579             }
10580
10581             tcg_temp_free_i32(tcg_op1);
10582             tcg_temp_free_i32(tcg_op2);
10583         }
10584
10585         for (pass = 0; pass < maxpass; pass++) {
10586             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10587             tcg_temp_free_i32(tcg_res[pass]);
10588         }
10589         clear_vec_high(s, is_q, rd);
10590     }
10591
10592     if (fpst) {
10593         tcg_temp_free_ptr(fpst);
10594     }
10595 }
10596
10597 /* Floating point op subgroup of C3.6.16. */
10598 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
10599 {
10600     /* For floating point ops, the U, size[1] and opcode bits
10601      * together indicate the operation. size[0] indicates single
10602      * or double.
10603      */
10604     int fpopcode = extract32(insn, 11, 5)
10605         | (extract32(insn, 23, 1) << 5)
10606         | (extract32(insn, 29, 1) << 6);
10607     int is_q = extract32(insn, 30, 1);
10608     int size = extract32(insn, 22, 1);
10609     int rm = extract32(insn, 16, 5);
10610     int rn = extract32(insn, 5, 5);
10611     int rd = extract32(insn, 0, 5);
10612
10613     int datasize = is_q ? 128 : 64;
10614     int esize = 32 << size;
10615     int elements = datasize / esize;
10616
10617     if (size == 1 && !is_q) {
10618         unallocated_encoding(s);
10619         return;
10620     }
10621
10622     switch (fpopcode) {
10623     case 0x58: /* FMAXNMP */
10624     case 0x5a: /* FADDP */
10625     case 0x5e: /* FMAXP */
10626     case 0x78: /* FMINNMP */
10627     case 0x7e: /* FMINP */
10628         if (size && !is_q) {
10629             unallocated_encoding(s);
10630             return;
10631         }
10632         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
10633                                rn, rm, rd);
10634         return;
10635     case 0x1b: /* FMULX */
10636     case 0x1f: /* FRECPS */
10637     case 0x3f: /* FRSQRTS */
10638     case 0x5d: /* FACGE */
10639     case 0x7d: /* FACGT */
10640     case 0x19: /* FMLA */
10641     case 0x39: /* FMLS */
10642     case 0x18: /* FMAXNM */
10643     case 0x1a: /* FADD */
10644     case 0x1c: /* FCMEQ */
10645     case 0x1e: /* FMAX */
10646     case 0x38: /* FMINNM */
10647     case 0x3a: /* FSUB */
10648     case 0x3e: /* FMIN */
10649     case 0x5b: /* FMUL */
10650     case 0x5c: /* FCMGE */
10651     case 0x5f: /* FDIV */
10652     case 0x7a: /* FABD */
10653     case 0x7c: /* FCMGT */
10654         if (!fp_access_check(s)) {
10655             return;
10656         }
10657
10658         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
10659         return;
10660     default:
10661         unallocated_encoding(s);
10662         return;
10663     }
10664 }
10665
10666 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10667 {
10668     gen_helper_neon_mul_u8(a, a, b);
10669     gen_helper_neon_add_u8(d, d, a);
10670 }
10671
10672 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10673 {
10674     gen_helper_neon_mul_u16(a, a, b);
10675     gen_helper_neon_add_u16(d, d, a);
10676 }
10677
10678 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10679 {
10680     tcg_gen_mul_i32(a, a, b);
10681     tcg_gen_add_i32(d, d, a);
10682 }
10683
10684 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
10685 {
10686     tcg_gen_mul_i64(a, a, b);
10687     tcg_gen_add_i64(d, d, a);
10688 }
10689
10690 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
10691 {
10692     tcg_gen_mul_vec(vece, a, a, b);
10693     tcg_gen_add_vec(vece, d, d, a);
10694 }
10695
10696 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10697 {
10698     gen_helper_neon_mul_u8(a, a, b);
10699     gen_helper_neon_sub_u8(d, d, a);
10700 }
10701
10702 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10703 {
10704     gen_helper_neon_mul_u16(a, a, b);
10705     gen_helper_neon_sub_u16(d, d, a);
10706 }
10707
10708 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10709 {
10710     tcg_gen_mul_i32(a, a, b);
10711     tcg_gen_sub_i32(d, d, a);
10712 }
10713
10714 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
10715 {
10716     tcg_gen_mul_i64(a, a, b);
10717     tcg_gen_sub_i64(d, d, a);
10718 }
10719
10720 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
10721 {
10722     tcg_gen_mul_vec(vece, a, a, b);
10723     tcg_gen_sub_vec(vece, d, d, a);
10724 }
10725
10726 /* Integer op subgroup of C3.6.16. */
10727 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
10728 {
10729     static const GVecGen3 cmtst_op[4] = {
10730         { .fni4 = gen_helper_neon_tst_u8,
10731           .fniv = gen_cmtst_vec,
10732           .vece = MO_8 },
10733         { .fni4 = gen_helper_neon_tst_u16,
10734           .fniv = gen_cmtst_vec,
10735           .vece = MO_16 },
10736         { .fni4 = gen_cmtst_i32,
10737           .fniv = gen_cmtst_vec,
10738           .vece = MO_32 },
10739         { .fni8 = gen_cmtst_i64,
10740           .fniv = gen_cmtst_vec,
10741           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10742           .vece = MO_64 },
10743     };
10744     static const GVecGen3 mla_op[4] = {
10745         { .fni4 = gen_mla8_i32,
10746           .fniv = gen_mla_vec,
10747           .opc = INDEX_op_mul_vec,
10748           .load_dest = true,
10749           .vece = MO_8 },
10750         { .fni4 = gen_mla16_i32,
10751           .fniv = gen_mla_vec,
10752           .opc = INDEX_op_mul_vec,
10753           .load_dest = true,
10754           .vece = MO_16 },
10755         { .fni4 = gen_mla32_i32,
10756           .fniv = gen_mla_vec,
10757           .opc = INDEX_op_mul_vec,
10758           .load_dest = true,
10759           .vece = MO_32 },
10760         { .fni8 = gen_mla64_i64,
10761           .fniv = gen_mla_vec,
10762           .opc = INDEX_op_mul_vec,
10763           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10764           .load_dest = true,
10765           .vece = MO_64 },
10766     };
10767     static const GVecGen3 mls_op[4] = {
10768         { .fni4 = gen_mls8_i32,
10769           .fniv = gen_mls_vec,
10770           .opc = INDEX_op_mul_vec,
10771           .load_dest = true,
10772           .vece = MO_8 },
10773         { .fni4 = gen_mls16_i32,
10774           .fniv = gen_mls_vec,
10775           .opc = INDEX_op_mul_vec,
10776           .load_dest = true,
10777           .vece = MO_16 },
10778         { .fni4 = gen_mls32_i32,
10779           .fniv = gen_mls_vec,
10780           .opc = INDEX_op_mul_vec,
10781           .load_dest = true,
10782           .vece = MO_32 },
10783         { .fni8 = gen_mls64_i64,
10784           .fniv = gen_mls_vec,
10785           .opc = INDEX_op_mul_vec,
10786           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10787           .load_dest = true,
10788           .vece = MO_64 },
10789     };
10790
10791     int is_q = extract32(insn, 30, 1);
10792     int u = extract32(insn, 29, 1);
10793     int size = extract32(insn, 22, 2);
10794     int opcode = extract32(insn, 11, 5);
10795     int rm = extract32(insn, 16, 5);
10796     int rn = extract32(insn, 5, 5);
10797     int rd = extract32(insn, 0, 5);
10798     int pass;
10799     TCGCond cond;
10800
10801     switch (opcode) {
10802     case 0x13: /* MUL, PMUL */
10803         if (u && size != 0) {
10804             unallocated_encoding(s);
10805             return;
10806         }
10807         /* fall through */
10808     case 0x0: /* SHADD, UHADD */
10809     case 0x2: /* SRHADD, URHADD */
10810     case 0x4: /* SHSUB, UHSUB */
10811     case 0xc: /* SMAX, UMAX */
10812     case 0xd: /* SMIN, UMIN */
10813     case 0xe: /* SABD, UABD */
10814     case 0xf: /* SABA, UABA */
10815     case 0x12: /* MLA, MLS */
10816         if (size == 3) {
10817             unallocated_encoding(s);
10818             return;
10819         }
10820         break;
10821     case 0x16: /* SQDMULH, SQRDMULH */
10822         if (size == 0 || size == 3) {
10823             unallocated_encoding(s);
10824             return;
10825         }
10826         break;
10827     default:
10828         if (size == 3 && !is_q) {
10829             unallocated_encoding(s);
10830             return;
10831         }
10832         break;
10833     }
10834
10835     if (!fp_access_check(s)) {
10836         return;
10837     }
10838
10839     switch (opcode) {
10840     case 0x10: /* ADD, SUB */
10841         if (u) {
10842             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
10843         } else {
10844             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
10845         }
10846         return;
10847     case 0x13: /* MUL, PMUL */
10848         if (!u) { /* MUL */
10849             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
10850             return;
10851         }
10852         break;
10853     case 0x12: /* MLA, MLS */
10854         if (u) {
10855             gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
10856         } else {
10857             gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
10858         }
10859         return;
10860     case 0x11:
10861         if (!u) { /* CMTST */
10862             gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
10863             return;
10864         }
10865         /* else CMEQ */
10866         cond = TCG_COND_EQ;
10867         goto do_gvec_cmp;
10868     case 0x06: /* CMGT, CMHI */
10869         cond = u ? TCG_COND_GTU : TCG_COND_GT;
10870         goto do_gvec_cmp;
10871     case 0x07: /* CMGE, CMHS */
10872         cond = u ? TCG_COND_GEU : TCG_COND_GE;
10873     do_gvec_cmp:
10874         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
10875                          vec_full_reg_offset(s, rn),
10876                          vec_full_reg_offset(s, rm),
10877                          is_q ? 16 : 8, vec_full_reg_size(s));
10878         return;
10879     }
10880
10881     if (size == 3) {
10882         assert(is_q);
10883         for (pass = 0; pass < 2; pass++) {
10884             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10885             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10886             TCGv_i64 tcg_res = tcg_temp_new_i64();
10887
10888             read_vec_element(s, tcg_op1, rn, pass, MO_64);
10889             read_vec_element(s, tcg_op2, rm, pass, MO_64);
10890
10891             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
10892
10893             write_vec_element(s, tcg_res, rd, pass, MO_64);
10894
10895             tcg_temp_free_i64(tcg_res);
10896             tcg_temp_free_i64(tcg_op1);
10897             tcg_temp_free_i64(tcg_op2);
10898         }
10899     } else {
10900         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10901             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10902             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10903             TCGv_i32 tcg_res = tcg_temp_new_i32();
10904             NeonGenTwoOpFn *genfn = NULL;
10905             NeonGenTwoOpEnvFn *genenvfn = NULL;
10906
10907             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
10908             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
10909
10910             switch (opcode) {
10911             case 0x0: /* SHADD, UHADD */
10912             {
10913                 static NeonGenTwoOpFn * const fns[3][2] = {
10914                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
10915                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
10916                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
10917                 };
10918                 genfn = fns[size][u];
10919                 break;
10920             }
10921             case 0x1: /* SQADD, UQADD */
10922             {
10923                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10924                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
10925                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
10926                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
10927                 };
10928                 genenvfn = fns[size][u];
10929                 break;
10930             }
10931             case 0x2: /* SRHADD, URHADD */
10932             {
10933                 static NeonGenTwoOpFn * const fns[3][2] = {
10934                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
10935                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
10936                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
10937                 };
10938                 genfn = fns[size][u];
10939                 break;
10940             }
10941             case 0x4: /* SHSUB, UHSUB */
10942             {
10943                 static NeonGenTwoOpFn * const fns[3][2] = {
10944                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
10945                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
10946                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
10947                 };
10948                 genfn = fns[size][u];
10949                 break;
10950             }
10951             case 0x5: /* SQSUB, UQSUB */
10952             {
10953                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10954                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
10955                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
10956                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
10957                 };
10958                 genenvfn = fns[size][u];
10959                 break;
10960             }
10961             case 0x8: /* SSHL, USHL */
10962             {
10963                 static NeonGenTwoOpFn * const fns[3][2] = {
10964                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
10965                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
10966                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
10967                 };
10968                 genfn = fns[size][u];
10969                 break;
10970             }
10971             case 0x9: /* SQSHL, UQSHL */
10972             {
10973                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10974                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
10975                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
10976                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
10977                 };
10978                 genenvfn = fns[size][u];
10979                 break;
10980             }
10981             case 0xa: /* SRSHL, URSHL */
10982             {
10983                 static NeonGenTwoOpFn * const fns[3][2] = {
10984                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
10985                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
10986                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
10987                 };
10988                 genfn = fns[size][u];
10989                 break;
10990             }
10991             case 0xb: /* SQRSHL, UQRSHL */
10992             {
10993                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10994                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
10995                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
10996                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
10997                 };
10998                 genenvfn = fns[size][u];
10999                 break;
11000             }
11001             case 0xc: /* SMAX, UMAX */
11002             {
11003                 static NeonGenTwoOpFn * const fns[3][2] = {
11004                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
11005                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
11006                     { tcg_gen_smax_i32, tcg_gen_umax_i32 },
11007                 };
11008                 genfn = fns[size][u];
11009                 break;
11010             }
11011
11012             case 0xd: /* SMIN, UMIN */
11013             {
11014                 static NeonGenTwoOpFn * const fns[3][2] = {
11015                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
11016                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
11017                     { tcg_gen_smin_i32, tcg_gen_umin_i32 },
11018                 };
11019                 genfn = fns[size][u];
11020                 break;
11021             }
11022             case 0xe: /* SABD, UABD */
11023             case 0xf: /* SABA, UABA */
11024             {
11025                 static NeonGenTwoOpFn * const fns[3][2] = {
11026                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
11027                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
11028                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
11029                 };
11030                 genfn = fns[size][u];
11031                 break;
11032             }
11033             case 0x13: /* MUL, PMUL */
11034                 assert(u); /* PMUL */
11035                 assert(size == 0);
11036                 genfn = gen_helper_neon_mul_p8;
11037                 break;
11038             case 0x16: /* SQDMULH, SQRDMULH */
11039             {
11040                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
11041                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
11042                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
11043                 };
11044                 assert(size == 1 || size == 2);
11045                 genenvfn = fns[size - 1][u];
11046                 break;
11047             }
11048             default:
11049                 g_assert_not_reached();
11050             }
11051
11052             if (genenvfn) {
11053                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
11054             } else {
11055                 genfn(tcg_res, tcg_op1, tcg_op2);
11056             }
11057
11058             if (opcode == 0xf) {
11059                 /* SABA, UABA: accumulating ops */
11060                 static NeonGenTwoOpFn * const fns[3] = {
11061                     gen_helper_neon_add_u8,
11062                     gen_helper_neon_add_u16,
11063                     tcg_gen_add_i32,
11064                 };
11065
11066                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
11067                 fns[size](tcg_res, tcg_op1, tcg_res);
11068             }
11069
11070             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11071
11072             tcg_temp_free_i32(tcg_res);
11073             tcg_temp_free_i32(tcg_op1);
11074             tcg_temp_free_i32(tcg_op2);
11075         }
11076     }
11077     clear_vec_high(s, is_q, rd);
11078 }
11079
11080 /* AdvSIMD three same
11081  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
11082  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11083  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
11084  * +---+---+---+-----------+------+---+------+--------+---+------+------+
11085  */
11086 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
11087 {
11088     int opcode = extract32(insn, 11, 5);
11089
11090     switch (opcode) {
11091     case 0x3: /* logic ops */
11092         disas_simd_3same_logic(s, insn);
11093         break;
11094     case 0x17: /* ADDP */
11095     case 0x14: /* SMAXP, UMAXP */
11096     case 0x15: /* SMINP, UMINP */
11097     {
11098         /* Pairwise operations */
11099         int is_q = extract32(insn, 30, 1);
11100         int u = extract32(insn, 29, 1);
11101         int size = extract32(insn, 22, 2);
11102         int rm = extract32(insn, 16, 5);
11103         int rn = extract32(insn, 5, 5);
11104         int rd = extract32(insn, 0, 5);
11105         if (opcode == 0x17) {
11106             if (u || (size == 3 && !is_q)) {
11107                 unallocated_encoding(s);
11108                 return;
11109             }
11110         } else {
11111             if (size == 3) {
11112                 unallocated_encoding(s);
11113                 return;
11114             }
11115         }
11116         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
11117         break;
11118     }
11119     case 0x18 ... 0x31:
11120         /* floating point ops, sz[1] and U are part of opcode */
11121         disas_simd_3same_float(s, insn);
11122         break;
11123     default:
11124         disas_simd_3same_int(s, insn);
11125         break;
11126     }
11127 }
11128
11129 /*
11130  * Advanced SIMD three same (ARMv8.2 FP16 variants)
11131  *
11132  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
11133  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11134  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
11135  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
11136  *
11137  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
11138  * (register), FACGE, FABD, FCMGT (register) and FACGT.
11139  *
11140  */
11141 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
11142 {
11143     int opcode, fpopcode;
11144     int is_q, u, a, rm, rn, rd;
11145     int datasize, elements;
11146     int pass;
11147     TCGv_ptr fpst;
11148     bool pairwise = false;
11149
11150     if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
11151         unallocated_encoding(s);
11152         return;
11153     }
11154
11155     if (!fp_access_check(s)) {
11156         return;
11157     }
11158
11159     /* For these floating point ops, the U, a and opcode bits
11160      * together indicate the operation.
11161      */
11162     opcode = extract32(insn, 11, 3);
11163     u = extract32(insn, 29, 1);
11164     a = extract32(insn, 23, 1);
11165     is_q = extract32(insn, 30, 1);
11166     rm = extract32(insn, 16, 5);
11167     rn = extract32(insn, 5, 5);
11168     rd = extract32(insn, 0, 5);
11169
11170     fpopcode = opcode | (a << 3) |  (u << 4);
11171     datasize = is_q ? 128 : 64;
11172     elements = datasize / 16;
11173
11174     switch (fpopcode) {
11175     case 0x10: /* FMAXNMP */
11176     case 0x12: /* FADDP */
11177     case 0x16: /* FMAXP */
11178     case 0x18: /* FMINNMP */
11179     case 0x1e: /* FMINP */
11180         pairwise = true;
11181         break;
11182     }
11183
11184     fpst = get_fpstatus_ptr(true);
11185
11186     if (pairwise) {
11187         int maxpass = is_q ? 8 : 4;
11188         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11189         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11190         TCGv_i32 tcg_res[8];
11191
11192         for (pass = 0; pass < maxpass; pass++) {
11193             int passreg = pass < (maxpass / 2) ? rn : rm;
11194             int passelt = (pass << 1) & (maxpass - 1);
11195
11196             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
11197             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
11198             tcg_res[pass] = tcg_temp_new_i32();
11199
11200             switch (fpopcode) {
11201             case 0x10: /* FMAXNMP */
11202                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
11203                                            fpst);
11204                 break;
11205             case 0x12: /* FADDP */
11206                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11207                 break;
11208             case 0x16: /* FMAXP */
11209                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11210                 break;
11211             case 0x18: /* FMINNMP */
11212                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
11213                                            fpst);
11214                 break;
11215             case 0x1e: /* FMINP */
11216                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
11217                 break;
11218             default:
11219                 g_assert_not_reached();
11220             }
11221         }
11222
11223         for (pass = 0; pass < maxpass; pass++) {
11224             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
11225             tcg_temp_free_i32(tcg_res[pass]);
11226         }
11227
11228         tcg_temp_free_i32(tcg_op1);
11229         tcg_temp_free_i32(tcg_op2);
11230
11231     } else {
11232         for (pass = 0; pass < elements; pass++) {
11233             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
11234             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
11235             TCGv_i32 tcg_res = tcg_temp_new_i32();
11236
11237             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
11238             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
11239
11240             switch (fpopcode) {
11241             case 0x0: /* FMAXNM */
11242                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11243                 break;
11244             case 0x1: /* FMLA */
11245                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11246                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11247                                            fpst);
11248                 break;
11249             case 0x2: /* FADD */
11250                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
11251                 break;
11252             case 0x3: /* FMULX */
11253                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
11254                 break;
11255             case 0x4: /* FCMEQ */
11256                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11257                 break;
11258             case 0x6: /* FMAX */
11259                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
11260                 break;
11261             case 0x7: /* FRECPS */
11262                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11263                 break;
11264             case 0x8: /* FMINNM */
11265                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
11266                 break;
11267             case 0x9: /* FMLS */
11268                 /* As usual for ARM, separate negation for fused multiply-add */
11269                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
11270                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11271                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
11272                                            fpst);
11273                 break;
11274             case 0xa: /* FSUB */
11275                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11276                 break;
11277             case 0xe: /* FMIN */
11278                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
11279                 break;
11280             case 0xf: /* FRSQRTS */
11281                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11282                 break;
11283             case 0x13: /* FMUL */
11284                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
11285                 break;
11286             case 0x14: /* FCMGE */
11287                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11288                 break;
11289             case 0x15: /* FACGE */
11290                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11291                 break;
11292             case 0x17: /* FDIV */
11293                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
11294                 break;
11295             case 0x1a: /* FABD */
11296                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
11297                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
11298                 break;
11299             case 0x1c: /* FCMGT */
11300                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11301                 break;
11302             case 0x1d: /* FACGT */
11303                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
11304                 break;
11305             default:
11306                 fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
11307                         __func__, insn, fpopcode, s->pc);
11308                 g_assert_not_reached();
11309             }
11310
11311             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11312             tcg_temp_free_i32(tcg_res);
11313             tcg_temp_free_i32(tcg_op1);
11314             tcg_temp_free_i32(tcg_op2);
11315         }
11316     }
11317
11318     tcg_temp_free_ptr(fpst);
11319
11320     clear_vec_high(s, is_q, rd);
11321 }
11322
11323 /* AdvSIMD three same extra
11324  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
11325  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11326  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
11327  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
11328  */
11329 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
11330 {
11331     int rd = extract32(insn, 0, 5);
11332     int rn = extract32(insn, 5, 5);
11333     int opcode = extract32(insn, 11, 4);
11334     int rm = extract32(insn, 16, 5);
11335     int size = extract32(insn, 22, 2);
11336     bool u = extract32(insn, 29, 1);
11337     bool is_q = extract32(insn, 30, 1);
11338     int feature, rot;
11339
11340     switch (u * 16 + opcode) {
11341     case 0x10: /* SQRDMLAH (vector) */
11342     case 0x11: /* SQRDMLSH (vector) */
11343         if (size != 1 && size != 2) {
11344             unallocated_encoding(s);
11345             return;
11346         }
11347         feature = ARM_FEATURE_V8_RDM;
11348         break;
11349     case 0x02: /* SDOT (vector) */
11350     case 0x12: /* UDOT (vector) */
11351         if (size != MO_32) {
11352             unallocated_encoding(s);
11353             return;
11354         }
11355         feature = ARM_FEATURE_V8_DOTPROD;
11356         break;
11357     case 0x8: /* FCMLA, #0 */
11358     case 0x9: /* FCMLA, #90 */
11359     case 0xa: /* FCMLA, #180 */
11360     case 0xb: /* FCMLA, #270 */
11361     case 0xc: /* FCADD, #90 */
11362     case 0xe: /* FCADD, #270 */
11363         if (size == 0
11364             || (size == 1 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))
11365             || (size == 3 && !is_q)) {
11366             unallocated_encoding(s);
11367             return;
11368         }
11369         feature = ARM_FEATURE_V8_FCMA;
11370         break;
11371     default:
11372         unallocated_encoding(s);
11373         return;
11374     }
11375     if (!arm_dc_feature(s, feature)) {
11376         unallocated_encoding(s);
11377         return;
11378     }
11379     if (!fp_access_check(s)) {
11380         return;
11381     }
11382
11383     switch (opcode) {
11384     case 0x0: /* SQRDMLAH (vector) */
11385         switch (size) {
11386         case 1:
11387             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16);
11388             break;
11389         case 2:
11390             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32);
11391             break;
11392         default:
11393             g_assert_not_reached();
11394         }
11395         return;
11396
11397     case 0x1: /* SQRDMLSH (vector) */
11398         switch (size) {
11399         case 1:
11400             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16);
11401             break;
11402         case 2:
11403             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32);
11404             break;
11405         default:
11406             g_assert_not_reached();
11407         }
11408         return;
11409
11410     case 0x2: /* SDOT / UDOT */
11411         gen_gvec_op3_ool(s, is_q, rd, rn, rm, 0,
11412                          u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b);
11413         return;
11414
11415     case 0x8: /* FCMLA, #0 */
11416     case 0x9: /* FCMLA, #90 */
11417     case 0xa: /* FCMLA, #180 */
11418     case 0xb: /* FCMLA, #270 */
11419         rot = extract32(opcode, 0, 2);
11420         switch (size) {
11421         case 1:
11422             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, true, rot,
11423                               gen_helper_gvec_fcmlah);
11424             break;
11425         case 2:
11426             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11427                               gen_helper_gvec_fcmlas);
11428             break;
11429         case 3:
11430             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, false, rot,
11431                               gen_helper_gvec_fcmlad);
11432             break;
11433         default:
11434             g_assert_not_reached();
11435         }
11436         return;
11437
11438     case 0xc: /* FCADD, #90 */
11439     case 0xe: /* FCADD, #270 */
11440         rot = extract32(opcode, 1, 1);
11441         switch (size) {
11442         case 1:
11443             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11444                               gen_helper_gvec_fcaddh);
11445             break;
11446         case 2:
11447             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11448                               gen_helper_gvec_fcadds);
11449             break;
11450         case 3:
11451             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
11452                               gen_helper_gvec_fcaddd);
11453             break;
11454         default:
11455             g_assert_not_reached();
11456         }
11457         return;
11458
11459     default:
11460         g_assert_not_reached();
11461     }
11462 }
11463
11464 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
11465                                   int size, int rn, int rd)
11466 {
11467     /* Handle 2-reg-misc ops which are widening (so each size element
11468      * in the source becomes a 2*size element in the destination.
11469      * The only instruction like this is FCVTL.
11470      */
11471     int pass;
11472
11473     if (size == 3) {
11474         /* 32 -> 64 bit fp conversion */
11475         TCGv_i64 tcg_res[2];
11476         int srcelt = is_q ? 2 : 0;
11477
11478         for (pass = 0; pass < 2; pass++) {
11479             TCGv_i32 tcg_op = tcg_temp_new_i32();
11480             tcg_res[pass] = tcg_temp_new_i64();
11481
11482             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
11483             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
11484             tcg_temp_free_i32(tcg_op);
11485         }
11486         for (pass = 0; pass < 2; pass++) {
11487             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11488             tcg_temp_free_i64(tcg_res[pass]);
11489         }
11490     } else {
11491         /* 16 -> 32 bit fp conversion */
11492         int srcelt = is_q ? 4 : 0;
11493         TCGv_i32 tcg_res[4];
11494         TCGv_ptr fpst = get_fpstatus_ptr(false);
11495         TCGv_i32 ahp = get_ahp_flag();
11496
11497         for (pass = 0; pass < 4; pass++) {
11498             tcg_res[pass] = tcg_temp_new_i32();
11499
11500             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
11501             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
11502                                            fpst, ahp);
11503         }
11504         for (pass = 0; pass < 4; pass++) {
11505             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
11506             tcg_temp_free_i32(tcg_res[pass]);
11507         }
11508
11509         tcg_temp_free_ptr(fpst);
11510         tcg_temp_free_i32(ahp);
11511     }
11512 }
11513
11514 static void handle_rev(DisasContext *s, int opcode, bool u,
11515                        bool is_q, int size, int rn, int rd)
11516 {
11517     int op = (opcode << 1) | u;
11518     int opsz = op + size;
11519     int grp_size = 3 - opsz;
11520     int dsize = is_q ? 128 : 64;
11521     int i;
11522
11523     if (opsz >= 3) {
11524         unallocated_encoding(s);
11525         return;
11526     }
11527
11528     if (!fp_access_check(s)) {
11529         return;
11530     }
11531
11532     if (size == 0) {
11533         /* Special case bytes, use bswap op on each group of elements */
11534         int groups = dsize / (8 << grp_size);
11535
11536         for (i = 0; i < groups; i++) {
11537             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
11538
11539             read_vec_element(s, tcg_tmp, rn, i, grp_size);
11540             switch (grp_size) {
11541             case MO_16:
11542                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
11543                 break;
11544             case MO_32:
11545                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
11546                 break;
11547             case MO_64:
11548                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11549                 break;
11550             default:
11551                 g_assert_not_reached();
11552             }
11553             write_vec_element(s, tcg_tmp, rd, i, grp_size);
11554             tcg_temp_free_i64(tcg_tmp);
11555         }
11556         clear_vec_high(s, is_q, rd);
11557     } else {
11558         int revmask = (1 << grp_size) - 1;
11559         int esize = 8 << size;
11560         int elements = dsize / esize;
11561         TCGv_i64 tcg_rn = tcg_temp_new_i64();
11562         TCGv_i64 tcg_rd = tcg_const_i64(0);
11563         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
11564
11565         for (i = 0; i < elements; i++) {
11566             int e_rev = (i & 0xf) ^ revmask;
11567             int off = e_rev * esize;
11568             read_vec_element(s, tcg_rn, rn, i, size);
11569             if (off >= 64) {
11570                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
11571                                     tcg_rn, off - 64, esize);
11572             } else {
11573                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
11574             }
11575         }
11576         write_vec_element(s, tcg_rd, rd, 0, MO_64);
11577         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
11578
11579         tcg_temp_free_i64(tcg_rd_hi);
11580         tcg_temp_free_i64(tcg_rd);
11581         tcg_temp_free_i64(tcg_rn);
11582     }
11583 }
11584
11585 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11586                                   bool is_q, int size, int rn, int rd)
11587 {
11588     /* Implement the pairwise operations from 2-misc:
11589      * SADDLP, UADDLP, SADALP, UADALP.
11590      * These all add pairs of elements in the input to produce a
11591      * double-width result element in the output (possibly accumulating).
11592      */
11593     bool accum = (opcode == 0x6);
11594     int maxpass = is_q ? 2 : 1;
11595     int pass;
11596     TCGv_i64 tcg_res[2];
11597
11598     if (size == 2) {
11599         /* 32 + 32 -> 64 op */
11600         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
11601
11602         for (pass = 0; pass < maxpass; pass++) {
11603             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11604             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11605
11606             tcg_res[pass] = tcg_temp_new_i64();
11607
11608             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11609             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11610             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11611             if (accum) {
11612                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11613                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11614             }
11615
11616             tcg_temp_free_i64(tcg_op1);
11617             tcg_temp_free_i64(tcg_op2);
11618         }
11619     } else {
11620         for (pass = 0; pass < maxpass; pass++) {
11621             TCGv_i64 tcg_op = tcg_temp_new_i64();
11622             NeonGenOneOpFn *genfn;
11623             static NeonGenOneOpFn * const fns[2][2] = {
11624                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11625                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11626             };
11627
11628             genfn = fns[size][u];
11629
11630             tcg_res[pass] = tcg_temp_new_i64();
11631
11632             read_vec_element(s, tcg_op, rn, pass, MO_64);
11633             genfn(tcg_res[pass], tcg_op);
11634
11635             if (accum) {
11636                 read_vec_element(s, tcg_op, rd, pass, MO_64);
11637                 if (size == 0) {
11638                     gen_helper_neon_addl_u16(tcg_res[pass],
11639                                              tcg_res[pass], tcg_op);
11640                 } else {
11641                     gen_helper_neon_addl_u32(tcg_res[pass],
11642                                              tcg_res[pass], tcg_op);
11643                 }
11644             }
11645             tcg_temp_free_i64(tcg_op);
11646         }
11647     }
11648     if (!is_q) {
11649         tcg_res[1] = tcg_const_i64(0);
11650     }
11651     for (pass = 0; pass < 2; pass++) {
11652         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11653         tcg_temp_free_i64(tcg_res[pass]);
11654     }
11655 }
11656
11657 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11658 {
11659     /* Implement SHLL and SHLL2 */
11660     int pass;
11661     int part = is_q ? 2 : 0;
11662     TCGv_i64 tcg_res[2];
11663
11664     for (pass = 0; pass < 2; pass++) {
11665         static NeonGenWidenFn * const widenfns[3] = {
11666             gen_helper_neon_widen_u8,
11667             gen_helper_neon_widen_u16,
11668             tcg_gen_extu_i32_i64,
11669         };
11670         NeonGenWidenFn *widenfn = widenfns[size];
11671         TCGv_i32 tcg_op = tcg_temp_new_i32();
11672
11673         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11674         tcg_res[pass] = tcg_temp_new_i64();
11675         widenfn(tcg_res[pass], tcg_op);
11676         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11677
11678         tcg_temp_free_i32(tcg_op);
11679     }
11680
11681     for (pass = 0; pass < 2; pass++) {
11682         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11683         tcg_temp_free_i64(tcg_res[pass]);
11684     }
11685 }
11686
11687 /* AdvSIMD two reg misc
11688  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11689  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11690  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11691  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11692  */
11693 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11694 {
11695     int size = extract32(insn, 22, 2);
11696     int opcode = extract32(insn, 12, 5);
11697     bool u = extract32(insn, 29, 1);
11698     bool is_q = extract32(insn, 30, 1);
11699     int rn = extract32(insn, 5, 5);
11700     int rd = extract32(insn, 0, 5);
11701     bool need_fpstatus = false;
11702     bool need_rmode = false;
11703     int rmode = -1;
11704     TCGv_i32 tcg_rmode;
11705     TCGv_ptr tcg_fpstatus;
11706
11707     switch (opcode) {
11708     case 0x0: /* REV64, REV32 */
11709     case 0x1: /* REV16 */
11710         handle_rev(s, opcode, u, is_q, size, rn, rd);
11711         return;
11712     case 0x5: /* CNT, NOT, RBIT */
11713         if (u && size == 0) {
11714             /* NOT */
11715             break;
11716         } else if (u && size == 1) {
11717             /* RBIT */
11718             break;
11719         } else if (!u && size == 0) {
11720             /* CNT */
11721             break;
11722         }
11723         unallocated_encoding(s);
11724         return;
11725     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11726     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11727         if (size == 3) {
11728             unallocated_encoding(s);
11729             return;
11730         }
11731         if (!fp_access_check(s)) {
11732             return;
11733         }
11734
11735         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
11736         return;
11737     case 0x4: /* CLS, CLZ */
11738         if (size == 3) {
11739             unallocated_encoding(s);
11740             return;
11741         }
11742         break;
11743     case 0x2: /* SADDLP, UADDLP */
11744     case 0x6: /* SADALP, UADALP */
11745         if (size == 3) {
11746             unallocated_encoding(s);
11747             return;
11748         }
11749         if (!fp_access_check(s)) {
11750             return;
11751         }
11752         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
11753         return;
11754     case 0x13: /* SHLL, SHLL2 */
11755         if (u == 0 || size == 3) {
11756             unallocated_encoding(s);
11757             return;
11758         }
11759         if (!fp_access_check(s)) {
11760             return;
11761         }
11762         handle_shll(s, is_q, size, rn, rd);
11763         return;
11764     case 0xa: /* CMLT */
11765         if (u == 1) {
11766             unallocated_encoding(s);
11767             return;
11768         }
11769         /* fall through */
11770     case 0x8: /* CMGT, CMGE */
11771     case 0x9: /* CMEQ, CMLE */
11772     case 0xb: /* ABS, NEG */
11773         if (size == 3 && !is_q) {
11774             unallocated_encoding(s);
11775             return;
11776         }
11777         break;
11778     case 0x3: /* SUQADD, USQADD */
11779         if (size == 3 && !is_q) {
11780             unallocated_encoding(s);
11781             return;
11782         }
11783         if (!fp_access_check(s)) {
11784             return;
11785         }
11786         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
11787         return;
11788     case 0x7: /* SQABS, SQNEG */
11789         if (size == 3 && !is_q) {
11790             unallocated_encoding(s);
11791             return;
11792         }
11793         break;
11794     case 0xc ... 0xf:
11795     case 0x16 ... 0x1d:
11796     case 0x1f:
11797     {
11798         /* Floating point: U, size[1] and opcode indicate operation;
11799          * size[0] indicates single or double precision.
11800          */
11801         int is_double = extract32(size, 0, 1);
11802         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
11803         size = is_double ? 3 : 2;
11804         switch (opcode) {
11805         case 0x2f: /* FABS */
11806         case 0x6f: /* FNEG */
11807             if (size == 3 && !is_q) {
11808                 unallocated_encoding(s);
11809                 return;
11810             }
11811             break;
11812         case 0x1d: /* SCVTF */
11813         case 0x5d: /* UCVTF */
11814         {
11815             bool is_signed = (opcode == 0x1d) ? true : false;
11816             int elements = is_double ? 2 : is_q ? 4 : 2;
11817             if (is_double && !is_q) {
11818                 unallocated_encoding(s);
11819                 return;
11820             }
11821             if (!fp_access_check(s)) {
11822                 return;
11823             }
11824             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
11825             return;
11826         }
11827         case 0x2c: /* FCMGT (zero) */
11828         case 0x2d: /* FCMEQ (zero) */
11829         case 0x2e: /* FCMLT (zero) */
11830         case 0x6c: /* FCMGE (zero) */
11831         case 0x6d: /* FCMLE (zero) */
11832             if (size == 3 && !is_q) {
11833                 unallocated_encoding(s);
11834                 return;
11835             }
11836             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
11837             return;
11838         case 0x7f: /* FSQRT */
11839             if (size == 3 && !is_q) {
11840                 unallocated_encoding(s);
11841                 return;
11842             }
11843             break;
11844         case 0x1a: /* FCVTNS */
11845         case 0x1b: /* FCVTMS */
11846         case 0x3a: /* FCVTPS */
11847         case 0x3b: /* FCVTZS */
11848         case 0x5a: /* FCVTNU */
11849         case 0x5b: /* FCVTMU */
11850         case 0x7a: /* FCVTPU */
11851         case 0x7b: /* FCVTZU */
11852             need_fpstatus = true;
11853             need_rmode = true;
11854             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11855             if (size == 3 && !is_q) {
11856                 unallocated_encoding(s);
11857                 return;
11858             }
11859             break;
11860         case 0x5c: /* FCVTAU */
11861         case 0x1c: /* FCVTAS */
11862             need_fpstatus = true;
11863             need_rmode = true;
11864             rmode = FPROUNDING_TIEAWAY;
11865             if (size == 3 && !is_q) {
11866                 unallocated_encoding(s);
11867                 return;
11868             }
11869             break;
11870         case 0x3c: /* URECPE */
11871             if (size == 3) {
11872                 unallocated_encoding(s);
11873                 return;
11874             }
11875             /* fall through */
11876         case 0x3d: /* FRECPE */
11877         case 0x7d: /* FRSQRTE */
11878             if (size == 3 && !is_q) {
11879                 unallocated_encoding(s);
11880                 return;
11881             }
11882             if (!fp_access_check(s)) {
11883                 return;
11884             }
11885             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
11886             return;
11887         case 0x56: /* FCVTXN, FCVTXN2 */
11888             if (size == 2) {
11889                 unallocated_encoding(s);
11890                 return;
11891             }
11892             /* fall through */
11893         case 0x16: /* FCVTN, FCVTN2 */
11894             /* handle_2misc_narrow does a 2*size -> size operation, but these
11895              * instructions encode the source size rather than dest size.
11896              */
11897             if (!fp_access_check(s)) {
11898                 return;
11899             }
11900             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
11901             return;
11902         case 0x17: /* FCVTL, FCVTL2 */
11903             if (!fp_access_check(s)) {
11904                 return;
11905             }
11906             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
11907             return;
11908         case 0x18: /* FRINTN */
11909         case 0x19: /* FRINTM */
11910         case 0x38: /* FRINTP */
11911         case 0x39: /* FRINTZ */
11912             need_rmode = true;
11913             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11914             /* fall through */
11915         case 0x59: /* FRINTX */
11916         case 0x79: /* FRINTI */
11917             need_fpstatus = true;
11918             if (size == 3 && !is_q) {
11919                 unallocated_encoding(s);
11920                 return;
11921             }
11922             break;
11923         case 0x58: /* FRINTA */
11924             need_rmode = true;
11925             rmode = FPROUNDING_TIEAWAY;
11926             need_fpstatus = true;
11927             if (size == 3 && !is_q) {
11928                 unallocated_encoding(s);
11929                 return;
11930             }
11931             break;
11932         case 0x7c: /* URSQRTE */
11933             if (size == 3) {
11934                 unallocated_encoding(s);
11935                 return;
11936             }
11937             need_fpstatus = true;
11938             break;
11939         default:
11940             unallocated_encoding(s);
11941             return;
11942         }
11943         break;
11944     }
11945     default:
11946         unallocated_encoding(s);
11947         return;
11948     }
11949
11950     if (!fp_access_check(s)) {
11951         return;
11952     }
11953
11954     if (need_fpstatus || need_rmode) {
11955         tcg_fpstatus = get_fpstatus_ptr(false);
11956     } else {
11957         tcg_fpstatus = NULL;
11958     }
11959     if (need_rmode) {
11960         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
11961         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
11962     } else {
11963         tcg_rmode = NULL;
11964     }
11965
11966     switch (opcode) {
11967     case 0x5:
11968         if (u && size == 0) { /* NOT */
11969             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
11970             return;
11971         }
11972         break;
11973     case 0xb:
11974         if (u) { /* NEG */
11975             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
11976             return;
11977         }
11978         break;
11979     }
11980
11981     if (size == 3) {
11982         /* All 64-bit element operations can be shared with scalar 2misc */
11983         int pass;
11984
11985         /* Coverity claims (size == 3 && !is_q) has been eliminated
11986          * from all paths leading to here.
11987          */
11988         tcg_debug_assert(is_q);
11989         for (pass = 0; pass < 2; pass++) {
11990             TCGv_i64 tcg_op = tcg_temp_new_i64();
11991             TCGv_i64 tcg_res = tcg_temp_new_i64();
11992
11993             read_vec_element(s, tcg_op, rn, pass, MO_64);
11994
11995             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
11996                             tcg_rmode, tcg_fpstatus);
11997
11998             write_vec_element(s, tcg_res, rd, pass, MO_64);
11999
12000             tcg_temp_free_i64(tcg_res);
12001             tcg_temp_free_i64(tcg_op);
12002         }
12003     } else {
12004         int pass;
12005
12006         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
12007             TCGv_i32 tcg_op = tcg_temp_new_i32();
12008             TCGv_i32 tcg_res = tcg_temp_new_i32();
12009             TCGCond cond;
12010
12011             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
12012
12013             if (size == 2) {
12014                 /* Special cases for 32 bit elements */
12015                 switch (opcode) {
12016                 case 0xa: /* CMLT */
12017                     /* 32 bit integer comparison against zero, result is
12018                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
12019                      * and inverting.
12020                      */
12021                     cond = TCG_COND_LT;
12022                 do_cmop:
12023                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
12024                     tcg_gen_neg_i32(tcg_res, tcg_res);
12025                     break;
12026                 case 0x8: /* CMGT, CMGE */
12027                     cond = u ? TCG_COND_GE : TCG_COND_GT;
12028                     goto do_cmop;
12029                 case 0x9: /* CMEQ, CMLE */
12030                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
12031                     goto do_cmop;
12032                 case 0x4: /* CLS */
12033                     if (u) {
12034                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
12035                     } else {
12036                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
12037                     }
12038                     break;
12039                 case 0x7: /* SQABS, SQNEG */
12040                     if (u) {
12041                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
12042                     } else {
12043                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
12044                     }
12045                     break;
12046                 case 0xb: /* ABS, NEG */
12047                     if (u) {
12048                         tcg_gen_neg_i32(tcg_res, tcg_op);
12049                     } else {
12050                         TCGv_i32 tcg_zero = tcg_const_i32(0);
12051                         tcg_gen_neg_i32(tcg_res, tcg_op);
12052                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
12053                                             tcg_zero, tcg_op, tcg_res);
12054                         tcg_temp_free_i32(tcg_zero);
12055                     }
12056                     break;
12057                 case 0x2f: /* FABS */
12058                     gen_helper_vfp_abss(tcg_res, tcg_op);
12059                     break;
12060                 case 0x6f: /* FNEG */
12061                     gen_helper_vfp_negs(tcg_res, tcg_op);
12062                     break;
12063                 case 0x7f: /* FSQRT */
12064                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
12065                     break;
12066                 case 0x1a: /* FCVTNS */
12067                 case 0x1b: /* FCVTMS */
12068                 case 0x1c: /* FCVTAS */
12069                 case 0x3a: /* FCVTPS */
12070                 case 0x3b: /* FCVTZS */
12071                 {
12072                     TCGv_i32 tcg_shift = tcg_const_i32(0);
12073                     gen_helper_vfp_tosls(tcg_res, tcg_op,
12074                                          tcg_shift, tcg_fpstatus);
12075                     tcg_temp_free_i32(tcg_shift);
12076                     break;
12077                 }
12078                 case 0x5a: /* FCVTNU */
12079                 case 0x5b: /* FCVTMU */
12080                 case 0x5c: /* FCVTAU */
12081                 case 0x7a: /* FCVTPU */
12082                 case 0x7b: /* FCVTZU */
12083                 {
12084                     TCGv_i32 tcg_shift = tcg_const_i32(0);
12085                     gen_helper_vfp_touls(tcg_res, tcg_op,
12086                                          tcg_shift, tcg_fpstatus);
12087                     tcg_temp_free_i32(tcg_shift);
12088                     break;
12089                 }
12090                 case 0x18: /* FRINTN */
12091                 case 0x19: /* FRINTM */
12092                 case 0x38: /* FRINTP */
12093                 case 0x39: /* FRINTZ */
12094                 case 0x58: /* FRINTA */
12095                 case 0x79: /* FRINTI */
12096                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
12097                     break;
12098                 case 0x59: /* FRINTX */
12099                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
12100                     break;
12101                 case 0x7c: /* URSQRTE */
12102                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
12103                     break;
12104                 default:
12105                     g_assert_not_reached();
12106                 }
12107             } else {
12108                 /* Use helpers for 8 and 16 bit elements */
12109                 switch (opcode) {
12110                 case 0x5: /* CNT, RBIT */
12111                     /* For these two insns size is part of the opcode specifier
12112                      * (handled earlier); they always operate on byte elements.
12113                      */
12114                     if (u) {
12115                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
12116                     } else {
12117                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
12118                     }
12119                     break;
12120                 case 0x7: /* SQABS, SQNEG */
12121                 {
12122                     NeonGenOneOpEnvFn *genfn;
12123                     static NeonGenOneOpEnvFn * const fns[2][2] = {
12124                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
12125                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
12126                     };
12127                     genfn = fns[size][u];
12128                     genfn(tcg_res, cpu_env, tcg_op);
12129                     break;
12130                 }
12131                 case 0x8: /* CMGT, CMGE */
12132                 case 0x9: /* CMEQ, CMLE */
12133                 case 0xa: /* CMLT */
12134                 {
12135                     static NeonGenTwoOpFn * const fns[3][2] = {
12136                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
12137                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
12138                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
12139                     };
12140                     NeonGenTwoOpFn *genfn;
12141                     int comp;
12142                     bool reverse;
12143                     TCGv_i32 tcg_zero = tcg_const_i32(0);
12144
12145                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
12146                     comp = (opcode - 0x8) * 2 + u;
12147                     /* ...but LE, LT are implemented as reverse GE, GT */
12148                     reverse = (comp > 2);
12149                     if (reverse) {
12150                         comp = 4 - comp;
12151                     }
12152                     genfn = fns[comp][size];
12153                     if (reverse) {
12154                         genfn(tcg_res, tcg_zero, tcg_op);
12155                     } else {
12156                         genfn(tcg_res, tcg_op, tcg_zero);
12157                     }
12158                     tcg_temp_free_i32(tcg_zero);
12159                     break;
12160                 }
12161                 case 0xb: /* ABS, NEG */
12162                     if (u) {
12163                         TCGv_i32 tcg_zero = tcg_const_i32(0);
12164                         if (size) {
12165                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
12166                         } else {
12167                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
12168                         }
12169                         tcg_temp_free_i32(tcg_zero);
12170                     } else {
12171                         if (size) {
12172                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
12173                         } else {
12174                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
12175                         }
12176                     }
12177                     break;
12178                 case 0x4: /* CLS, CLZ */
12179                     if (u) {
12180                         if (size == 0) {
12181                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
12182                         } else {
12183                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
12184                         }
12185                     } else {
12186                         if (size == 0) {
12187                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
12188                         } else {
12189                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
12190                         }
12191                     }
12192                     break;
12193                 default:
12194                     g_assert_not_reached();
12195                 }
12196             }
12197
12198             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12199
12200             tcg_temp_free_i32(tcg_res);
12201             tcg_temp_free_i32(tcg_op);
12202         }
12203     }
12204     clear_vec_high(s, is_q, rd);
12205
12206     if (need_rmode) {
12207         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12208         tcg_temp_free_i32(tcg_rmode);
12209     }
12210     if (need_fpstatus) {
12211         tcg_temp_free_ptr(tcg_fpstatus);
12212     }
12213 }
12214
12215 /* AdvSIMD [scalar] two register miscellaneous (FP16)
12216  *
12217  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
12218  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12219  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12220  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
12221  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
12222  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
12223  *
12224  * This actually covers two groups where scalar access is governed by
12225  * bit 28. A bunch of the instructions (float to integral) only exist
12226  * in the vector form and are un-allocated for the scalar decode. Also
12227  * in the scalar decode Q is always 1.
12228  */
12229 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
12230 {
12231     int fpop, opcode, a, u;
12232     int rn, rd;
12233     bool is_q;
12234     bool is_scalar;
12235     bool only_in_vector = false;
12236
12237     int pass;
12238     TCGv_i32 tcg_rmode = NULL;
12239     TCGv_ptr tcg_fpstatus = NULL;
12240     bool need_rmode = false;
12241     bool need_fpst = true;
12242     int rmode;
12243
12244     if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
12245         unallocated_encoding(s);
12246         return;
12247     }
12248
12249     rd = extract32(insn, 0, 5);
12250     rn = extract32(insn, 5, 5);
12251
12252     a = extract32(insn, 23, 1);
12253     u = extract32(insn, 29, 1);
12254     is_scalar = extract32(insn, 28, 1);
12255     is_q = extract32(insn, 30, 1);
12256
12257     opcode = extract32(insn, 12, 5);
12258     fpop = deposit32(opcode, 5, 1, a);
12259     fpop = deposit32(fpop, 6, 1, u);
12260
12261     rd = extract32(insn, 0, 5);
12262     rn = extract32(insn, 5, 5);
12263
12264     switch (fpop) {
12265     case 0x1d: /* SCVTF */
12266     case 0x5d: /* UCVTF */
12267     {
12268         int elements;
12269
12270         if (is_scalar) {
12271             elements = 1;
12272         } else {
12273             elements = (is_q ? 8 : 4);
12274         }
12275
12276         if (!fp_access_check(s)) {
12277             return;
12278         }
12279         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
12280         return;
12281     }
12282     break;
12283     case 0x2c: /* FCMGT (zero) */
12284     case 0x2d: /* FCMEQ (zero) */
12285     case 0x2e: /* FCMLT (zero) */
12286     case 0x6c: /* FCMGE (zero) */
12287     case 0x6d: /* FCMLE (zero) */
12288         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
12289         return;
12290     case 0x3d: /* FRECPE */
12291     case 0x3f: /* FRECPX */
12292         break;
12293     case 0x18: /* FRINTN */
12294         need_rmode = true;
12295         only_in_vector = true;
12296         rmode = FPROUNDING_TIEEVEN;
12297         break;
12298     case 0x19: /* FRINTM */
12299         need_rmode = true;
12300         only_in_vector = true;
12301         rmode = FPROUNDING_NEGINF;
12302         break;
12303     case 0x38: /* FRINTP */
12304         need_rmode = true;
12305         only_in_vector = true;
12306         rmode = FPROUNDING_POSINF;
12307         break;
12308     case 0x39: /* FRINTZ */
12309         need_rmode = true;
12310         only_in_vector = true;
12311         rmode = FPROUNDING_ZERO;
12312         break;
12313     case 0x58: /* FRINTA */
12314         need_rmode = true;
12315         only_in_vector = true;
12316         rmode = FPROUNDING_TIEAWAY;
12317         break;
12318     case 0x59: /* FRINTX */
12319     case 0x79: /* FRINTI */
12320         only_in_vector = true;
12321         /* current rounding mode */
12322         break;
12323     case 0x1a: /* FCVTNS */
12324         need_rmode = true;
12325         rmode = FPROUNDING_TIEEVEN;
12326         break;
12327     case 0x1b: /* FCVTMS */
12328         need_rmode = true;
12329         rmode = FPROUNDING_NEGINF;
12330         break;
12331     case 0x1c: /* FCVTAS */
12332         need_rmode = true;
12333         rmode = FPROUNDING_TIEAWAY;
12334         break;
12335     case 0x3a: /* FCVTPS */
12336         need_rmode = true;
12337         rmode = FPROUNDING_POSINF;
12338         break;
12339     case 0x3b: /* FCVTZS */
12340         need_rmode = true;
12341         rmode = FPROUNDING_ZERO;
12342         break;
12343     case 0x5a: /* FCVTNU */
12344         need_rmode = true;
12345         rmode = FPROUNDING_TIEEVEN;
12346         break;
12347     case 0x5b: /* FCVTMU */
12348         need_rmode = true;
12349         rmode = FPROUNDING_NEGINF;
12350         break;
12351     case 0x5c: /* FCVTAU */
12352         need_rmode = true;
12353         rmode = FPROUNDING_TIEAWAY;
12354         break;
12355     case 0x7a: /* FCVTPU */
12356         need_rmode = true;
12357         rmode = FPROUNDING_POSINF;
12358         break;
12359     case 0x7b: /* FCVTZU */
12360         need_rmode = true;
12361         rmode = FPROUNDING_ZERO;
12362         break;
12363     case 0x2f: /* FABS */
12364     case 0x6f: /* FNEG */
12365         need_fpst = false;
12366         break;
12367     case 0x7d: /* FRSQRTE */
12368     case 0x7f: /* FSQRT (vector) */
12369         break;
12370     default:
12371         fprintf(stderr, "%s: insn %#04x fpop %#2x\n", __func__, insn, fpop);
12372         g_assert_not_reached();
12373     }
12374
12375
12376     /* Check additional constraints for the scalar encoding */
12377     if (is_scalar) {
12378         if (!is_q) {
12379             unallocated_encoding(s);
12380             return;
12381         }
12382         /* FRINTxx is only in the vector form */
12383         if (only_in_vector) {
12384             unallocated_encoding(s);
12385             return;
12386         }
12387     }
12388
12389     if (!fp_access_check(s)) {
12390         return;
12391     }
12392
12393     if (need_rmode || need_fpst) {
12394         tcg_fpstatus = get_fpstatus_ptr(true);
12395     }
12396
12397     if (need_rmode) {
12398         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
12399         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12400     }
12401
12402     if (is_scalar) {
12403         TCGv_i32 tcg_op = read_fp_hreg(s, rn);
12404         TCGv_i32 tcg_res = tcg_temp_new_i32();
12405
12406         switch (fpop) {
12407         case 0x1a: /* FCVTNS */
12408         case 0x1b: /* FCVTMS */
12409         case 0x1c: /* FCVTAS */
12410         case 0x3a: /* FCVTPS */
12411         case 0x3b: /* FCVTZS */
12412             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12413             break;
12414         case 0x3d: /* FRECPE */
12415             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12416             break;
12417         case 0x3f: /* FRECPX */
12418             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
12419             break;
12420         case 0x5a: /* FCVTNU */
12421         case 0x5b: /* FCVTMU */
12422         case 0x5c: /* FCVTAU */
12423         case 0x7a: /* FCVTPU */
12424         case 0x7b: /* FCVTZU */
12425             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12426             break;
12427         case 0x6f: /* FNEG */
12428             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12429             break;
12430         case 0x7d: /* FRSQRTE */
12431             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12432             break;
12433         default:
12434             g_assert_not_reached();
12435         }
12436
12437         /* limit any sign extension going on */
12438         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
12439         write_fp_sreg(s, rd, tcg_res);
12440
12441         tcg_temp_free_i32(tcg_res);
12442         tcg_temp_free_i32(tcg_op);
12443     } else {
12444         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
12445             TCGv_i32 tcg_op = tcg_temp_new_i32();
12446             TCGv_i32 tcg_res = tcg_temp_new_i32();
12447
12448             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
12449
12450             switch (fpop) {
12451             case 0x1a: /* FCVTNS */
12452             case 0x1b: /* FCVTMS */
12453             case 0x1c: /* FCVTAS */
12454             case 0x3a: /* FCVTPS */
12455             case 0x3b: /* FCVTZS */
12456                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
12457                 break;
12458             case 0x3d: /* FRECPE */
12459                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
12460                 break;
12461             case 0x5a: /* FCVTNU */
12462             case 0x5b: /* FCVTMU */
12463             case 0x5c: /* FCVTAU */
12464             case 0x7a: /* FCVTPU */
12465             case 0x7b: /* FCVTZU */
12466                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
12467                 break;
12468             case 0x18: /* FRINTN */
12469             case 0x19: /* FRINTM */
12470             case 0x38: /* FRINTP */
12471             case 0x39: /* FRINTZ */
12472             case 0x58: /* FRINTA */
12473             case 0x79: /* FRINTI */
12474                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
12475                 break;
12476             case 0x59: /* FRINTX */
12477                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
12478                 break;
12479             case 0x2f: /* FABS */
12480                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
12481                 break;
12482             case 0x6f: /* FNEG */
12483                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
12484                 break;
12485             case 0x7d: /* FRSQRTE */
12486                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
12487                 break;
12488             case 0x7f: /* FSQRT */
12489                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
12490                 break;
12491             default:
12492                 g_assert_not_reached();
12493             }
12494
12495             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
12496
12497             tcg_temp_free_i32(tcg_res);
12498             tcg_temp_free_i32(tcg_op);
12499         }
12500
12501         clear_vec_high(s, is_q, rd);
12502     }
12503
12504     if (tcg_rmode) {
12505         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
12506         tcg_temp_free_i32(tcg_rmode);
12507     }
12508
12509     if (tcg_fpstatus) {
12510         tcg_temp_free_ptr(tcg_fpstatus);
12511     }
12512 }
12513
12514 /* AdvSIMD scalar x indexed element
12515  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12516  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12517  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12518  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
12519  * AdvSIMD vector x indexed element
12520  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
12521  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12522  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
12523  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
12524  */
12525 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
12526 {
12527     /* This encoding has two kinds of instruction:
12528      *  normal, where we perform elt x idxelt => elt for each
12529      *     element in the vector
12530      *  long, where we perform elt x idxelt and generate a result of
12531      *     double the width of the input element
12532      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
12533      */
12534     bool is_scalar = extract32(insn, 28, 1);
12535     bool is_q = extract32(insn, 30, 1);
12536     bool u = extract32(insn, 29, 1);
12537     int size = extract32(insn, 22, 2);
12538     int l = extract32(insn, 21, 1);
12539     int m = extract32(insn, 20, 1);
12540     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
12541     int rm = extract32(insn, 16, 4);
12542     int opcode = extract32(insn, 12, 4);
12543     int h = extract32(insn, 11, 1);
12544     int rn = extract32(insn, 5, 5);
12545     int rd = extract32(insn, 0, 5);
12546     bool is_long = false;
12547     int is_fp = 0;
12548     bool is_fp16 = false;
12549     int index;
12550     TCGv_ptr fpst;
12551
12552     switch (16 * u + opcode) {
12553     case 0x08: /* MUL */
12554     case 0x10: /* MLA */
12555     case 0x14: /* MLS */
12556         if (is_scalar) {
12557             unallocated_encoding(s);
12558             return;
12559         }
12560         break;
12561     case 0x02: /* SMLAL, SMLAL2 */
12562     case 0x12: /* UMLAL, UMLAL2 */
12563     case 0x06: /* SMLSL, SMLSL2 */
12564     case 0x16: /* UMLSL, UMLSL2 */
12565     case 0x0a: /* SMULL, SMULL2 */
12566     case 0x1a: /* UMULL, UMULL2 */
12567         if (is_scalar) {
12568             unallocated_encoding(s);
12569             return;
12570         }
12571         is_long = true;
12572         break;
12573     case 0x03: /* SQDMLAL, SQDMLAL2 */
12574     case 0x07: /* SQDMLSL, SQDMLSL2 */
12575     case 0x0b: /* SQDMULL, SQDMULL2 */
12576         is_long = true;
12577         break;
12578     case 0x0c: /* SQDMULH */
12579     case 0x0d: /* SQRDMULH */
12580         break;
12581     case 0x01: /* FMLA */
12582     case 0x05: /* FMLS */
12583     case 0x09: /* FMUL */
12584     case 0x19: /* FMULX */
12585         is_fp = 1;
12586         break;
12587     case 0x1d: /* SQRDMLAH */
12588     case 0x1f: /* SQRDMLSH */
12589         if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) {
12590             unallocated_encoding(s);
12591             return;
12592         }
12593         break;
12594     case 0x0e: /* SDOT */
12595     case 0x1e: /* UDOT */
12596         if (size != MO_32 || !arm_dc_feature(s, ARM_FEATURE_V8_DOTPROD)) {
12597             unallocated_encoding(s);
12598             return;
12599         }
12600         break;
12601     case 0x11: /* FCMLA #0 */
12602     case 0x13: /* FCMLA #90 */
12603     case 0x15: /* FCMLA #180 */
12604     case 0x17: /* FCMLA #270 */
12605         if (!arm_dc_feature(s, ARM_FEATURE_V8_FCMA)) {
12606             unallocated_encoding(s);
12607             return;
12608         }
12609         is_fp = 2;
12610         break;
12611     default:
12612         unallocated_encoding(s);
12613         return;
12614     }
12615
12616     switch (is_fp) {
12617     case 1: /* normal fp */
12618         /* convert insn encoded size to TCGMemOp size */
12619         switch (size) {
12620         case 0: /* half-precision */
12621             size = MO_16;
12622             is_fp16 = true;
12623             break;
12624         case MO_32: /* single precision */
12625         case MO_64: /* double precision */
12626             break;
12627         default:
12628             unallocated_encoding(s);
12629             return;
12630         }
12631         break;
12632
12633     case 2: /* complex fp */
12634         /* Each indexable element is a complex pair.  */
12635         size <<= 1;
12636         switch (size) {
12637         case MO_32:
12638             if (h && !is_q) {
12639                 unallocated_encoding(s);
12640                 return;
12641             }
12642             is_fp16 = true;
12643             break;
12644         case MO_64:
12645             break;
12646         default:
12647             unallocated_encoding(s);
12648             return;
12649         }
12650         break;
12651
12652     default: /* integer */
12653         switch (size) {
12654         case MO_8:
12655         case MO_64:
12656             unallocated_encoding(s);
12657             return;
12658         }
12659         break;
12660     }
12661     if (is_fp16 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
12662         unallocated_encoding(s);
12663         return;
12664     }
12665
12666     /* Given TCGMemOp size, adjust register and indexing.  */
12667     switch (size) {
12668     case MO_16:
12669         index = h << 2 | l << 1 | m;
12670         break;
12671     case MO_32:
12672         index = h << 1 | l;
12673         rm |= m << 4;
12674         break;
12675     case MO_64:
12676         if (l || !is_q) {
12677             unallocated_encoding(s);
12678             return;
12679         }
12680         index = h;
12681         rm |= m << 4;
12682         break;
12683     default:
12684         g_assert_not_reached();
12685     }
12686
12687     if (!fp_access_check(s)) {
12688         return;
12689     }
12690
12691     if (is_fp) {
12692         fpst = get_fpstatus_ptr(is_fp16);
12693     } else {
12694         fpst = NULL;
12695     }
12696
12697     switch (16 * u + opcode) {
12698     case 0x0e: /* SDOT */
12699     case 0x1e: /* UDOT */
12700         gen_gvec_op3_ool(s, is_q, rd, rn, rm, index,
12701                          u ? gen_helper_gvec_udot_idx_b
12702                          : gen_helper_gvec_sdot_idx_b);
12703         return;
12704     case 0x11: /* FCMLA #0 */
12705     case 0x13: /* FCMLA #90 */
12706     case 0x15: /* FCMLA #180 */
12707     case 0x17: /* FCMLA #270 */
12708         {
12709             int rot = extract32(insn, 13, 2);
12710             int data = (index << 2) | rot;
12711             tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
12712                                vec_full_reg_offset(s, rn),
12713                                vec_full_reg_offset(s, rm), fpst,
12714                                is_q ? 16 : 8, vec_full_reg_size(s), data,
12715                                size == MO_64
12716                                ? gen_helper_gvec_fcmlas_idx
12717                                : gen_helper_gvec_fcmlah_idx);
12718             tcg_temp_free_ptr(fpst);
12719         }
12720         return;
12721     }
12722
12723     if (size == 3) {
12724         TCGv_i64 tcg_idx = tcg_temp_new_i64();
12725         int pass;
12726
12727         assert(is_fp && is_q && !is_long);
12728
12729         read_vec_element(s, tcg_idx, rm, index, MO_64);
12730
12731         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12732             TCGv_i64 tcg_op = tcg_temp_new_i64();
12733             TCGv_i64 tcg_res = tcg_temp_new_i64();
12734
12735             read_vec_element(s, tcg_op, rn, pass, MO_64);
12736
12737             switch (16 * u + opcode) {
12738             case 0x05: /* FMLS */
12739                 /* As usual for ARM, separate negation for fused multiply-add */
12740                 gen_helper_vfp_negd(tcg_op, tcg_op);
12741                 /* fall through */
12742             case 0x01: /* FMLA */
12743                 read_vec_element(s, tcg_res, rd, pass, MO_64);
12744                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
12745                 break;
12746             case 0x09: /* FMUL */
12747                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
12748                 break;
12749             case 0x19: /* FMULX */
12750                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
12751                 break;
12752             default:
12753                 g_assert_not_reached();
12754             }
12755
12756             write_vec_element(s, tcg_res, rd, pass, MO_64);
12757             tcg_temp_free_i64(tcg_op);
12758             tcg_temp_free_i64(tcg_res);
12759         }
12760
12761         tcg_temp_free_i64(tcg_idx);
12762         clear_vec_high(s, !is_scalar, rd);
12763     } else if (!is_long) {
12764         /* 32 bit floating point, or 16 or 32 bit integer.
12765          * For the 16 bit scalar case we use the usual Neon helpers and
12766          * rely on the fact that 0 op 0 == 0 with no side effects.
12767          */
12768         TCGv_i32 tcg_idx = tcg_temp_new_i32();
12769         int pass, maxpasses;
12770
12771         if (is_scalar) {
12772             maxpasses = 1;
12773         } else {
12774             maxpasses = is_q ? 4 : 2;
12775         }
12776
12777         read_vec_element_i32(s, tcg_idx, rm, index, size);
12778
12779         if (size == 1 && !is_scalar) {
12780             /* The simplest way to handle the 16x16 indexed ops is to duplicate
12781              * the index into both halves of the 32 bit tcg_idx and then use
12782              * the usual Neon helpers.
12783              */
12784             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
12785         }
12786
12787         for (pass = 0; pass < maxpasses; pass++) {
12788             TCGv_i32 tcg_op = tcg_temp_new_i32();
12789             TCGv_i32 tcg_res = tcg_temp_new_i32();
12790
12791             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
12792
12793             switch (16 * u + opcode) {
12794             case 0x08: /* MUL */
12795             case 0x10: /* MLA */
12796             case 0x14: /* MLS */
12797             {
12798                 static NeonGenTwoOpFn * const fns[2][2] = {
12799                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
12800                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
12801                 };
12802                 NeonGenTwoOpFn *genfn;
12803                 bool is_sub = opcode == 0x4;
12804
12805                 if (size == 1) {
12806                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
12807                 } else {
12808                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
12809                 }
12810                 if (opcode == 0x8) {
12811                     break;
12812                 }
12813                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
12814                 genfn = fns[size - 1][is_sub];
12815                 genfn(tcg_res, tcg_op, tcg_res);
12816                 break;
12817             }
12818             case 0x05: /* FMLS */
12819             case 0x01: /* FMLA */
12820                 read_vec_element_i32(s, tcg_res, rd, pass,
12821                                      is_scalar ? size : MO_32);
12822                 switch (size) {
12823                 case 1:
12824                     if (opcode == 0x5) {
12825                         /* As usual for ARM, separate negation for fused
12826                          * multiply-add */
12827                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
12828                     }
12829                     if (is_scalar) {
12830                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
12831                                                    tcg_res, fpst);
12832                     } else {
12833                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
12834                                                     tcg_res, fpst);
12835                     }
12836                     break;
12837                 case 2:
12838                     if (opcode == 0x5) {
12839                         /* As usual for ARM, separate negation for
12840                          * fused multiply-add */
12841                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
12842                     }
12843                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
12844                                            tcg_res, fpst);
12845                     break;
12846                 default:
12847                     g_assert_not_reached();
12848                 }
12849                 break;
12850             case 0x09: /* FMUL */
12851                 switch (size) {
12852                 case 1:
12853                     if (is_scalar) {
12854                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
12855                                                 tcg_idx, fpst);
12856                     } else {
12857                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
12858                                                  tcg_idx, fpst);
12859                     }
12860                     break;
12861                 case 2:
12862                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
12863                     break;
12864                 default:
12865                     g_assert_not_reached();
12866                 }
12867                 break;
12868             case 0x19: /* FMULX */
12869                 switch (size) {
12870                 case 1:
12871                     if (is_scalar) {
12872                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
12873                                                  tcg_idx, fpst);
12874                     } else {
12875                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
12876                                                   tcg_idx, fpst);
12877                     }
12878                     break;
12879                 case 2:
12880                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
12881                     break;
12882                 default:
12883                     g_assert_not_reached();
12884                 }
12885                 break;
12886             case 0x0c: /* SQDMULH */
12887                 if (size == 1) {
12888                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
12889                                                tcg_op, tcg_idx);
12890                 } else {
12891                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
12892                                                tcg_op, tcg_idx);
12893                 }
12894                 break;
12895             case 0x0d: /* SQRDMULH */
12896                 if (size == 1) {
12897                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
12898                                                 tcg_op, tcg_idx);
12899                 } else {
12900                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
12901                                                 tcg_op, tcg_idx);
12902                 }
12903                 break;
12904             case 0x1d: /* SQRDMLAH */
12905                 read_vec_element_i32(s, tcg_res, rd, pass,
12906                                      is_scalar ? size : MO_32);
12907                 if (size == 1) {
12908                     gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
12909                                                 tcg_op, tcg_idx, tcg_res);
12910                 } else {
12911                     gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
12912                                                 tcg_op, tcg_idx, tcg_res);
12913                 }
12914                 break;
12915             case 0x1f: /* SQRDMLSH */
12916                 read_vec_element_i32(s, tcg_res, rd, pass,
12917                                      is_scalar ? size : MO_32);
12918                 if (size == 1) {
12919                     gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
12920                                                 tcg_op, tcg_idx, tcg_res);
12921                 } else {
12922                     gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
12923                                                 tcg_op, tcg_idx, tcg_res);
12924                 }
12925                 break;
12926             default:
12927                 g_assert_not_reached();
12928             }
12929
12930             if (is_scalar) {
12931                 write_fp_sreg(s, rd, tcg_res);
12932             } else {
12933                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12934             }
12935
12936             tcg_temp_free_i32(tcg_op);
12937             tcg_temp_free_i32(tcg_res);
12938         }
12939
12940         tcg_temp_free_i32(tcg_idx);
12941         clear_vec_high(s, is_q, rd);
12942     } else {
12943         /* long ops: 16x16->32 or 32x32->64 */
12944         TCGv_i64 tcg_res[2];
12945         int pass;
12946         bool satop = extract32(opcode, 0, 1);
12947         TCGMemOp memop = MO_32;
12948
12949         if (satop || !u) {
12950             memop |= MO_SIGN;
12951         }
12952
12953         if (size == 2) {
12954             TCGv_i64 tcg_idx = tcg_temp_new_i64();
12955
12956             read_vec_element(s, tcg_idx, rm, index, memop);
12957
12958             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12959                 TCGv_i64 tcg_op = tcg_temp_new_i64();
12960                 TCGv_i64 tcg_passres;
12961                 int passelt;
12962
12963                 if (is_scalar) {
12964                     passelt = 0;
12965                 } else {
12966                     passelt = pass + (is_q * 2);
12967                 }
12968
12969                 read_vec_element(s, tcg_op, rn, passelt, memop);
12970
12971                 tcg_res[pass] = tcg_temp_new_i64();
12972
12973                 if (opcode == 0xa || opcode == 0xb) {
12974                     /* Non-accumulating ops */
12975                     tcg_passres = tcg_res[pass];
12976                 } else {
12977                     tcg_passres = tcg_temp_new_i64();
12978                 }
12979
12980                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
12981                 tcg_temp_free_i64(tcg_op);
12982
12983                 if (satop) {
12984                     /* saturating, doubling */
12985                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
12986                                                       tcg_passres, tcg_passres);
12987                 }
12988
12989                 if (opcode == 0xa || opcode == 0xb) {
12990                     continue;
12991                 }
12992
12993                 /* Accumulating op: handle accumulate step */
12994                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12995
12996                 switch (opcode) {
12997                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
12998                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
12999                     break;
13000                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13001                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
13002                     break;
13003                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13004                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
13005                     /* fall through */
13006                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13007                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
13008                                                       tcg_res[pass],
13009                                                       tcg_passres);
13010                     break;
13011                 default:
13012                     g_assert_not_reached();
13013                 }
13014                 tcg_temp_free_i64(tcg_passres);
13015             }
13016             tcg_temp_free_i64(tcg_idx);
13017
13018             clear_vec_high(s, !is_scalar, rd);
13019         } else {
13020             TCGv_i32 tcg_idx = tcg_temp_new_i32();
13021
13022             assert(size == 1);
13023             read_vec_element_i32(s, tcg_idx, rm, index, size);
13024
13025             if (!is_scalar) {
13026                 /* The simplest way to handle the 16x16 indexed ops is to
13027                  * duplicate the index into both halves of the 32 bit tcg_idx
13028                  * and then use the usual Neon helpers.
13029                  */
13030                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
13031             }
13032
13033             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
13034                 TCGv_i32 tcg_op = tcg_temp_new_i32();
13035                 TCGv_i64 tcg_passres;
13036
13037                 if (is_scalar) {
13038                     read_vec_element_i32(s, tcg_op, rn, pass, size);
13039                 } else {
13040                     read_vec_element_i32(s, tcg_op, rn,
13041                                          pass + (is_q * 2), MO_32);
13042                 }
13043
13044                 tcg_res[pass] = tcg_temp_new_i64();
13045
13046                 if (opcode == 0xa || opcode == 0xb) {
13047                     /* Non-accumulating ops */
13048                     tcg_passres = tcg_res[pass];
13049                 } else {
13050                     tcg_passres = tcg_temp_new_i64();
13051                 }
13052
13053                 if (memop & MO_SIGN) {
13054                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
13055                 } else {
13056                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
13057                 }
13058                 if (satop) {
13059                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
13060                                                       tcg_passres, tcg_passres);
13061                 }
13062                 tcg_temp_free_i32(tcg_op);
13063
13064                 if (opcode == 0xa || opcode == 0xb) {
13065                     continue;
13066                 }
13067
13068                 /* Accumulating op: handle accumulate step */
13069                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13070
13071                 switch (opcode) {
13072                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
13073                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
13074                                              tcg_passres);
13075                     break;
13076                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
13077                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
13078                                              tcg_passres);
13079                     break;
13080                 case 0x7: /* SQDMLSL, SQDMLSL2 */
13081                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
13082                     /* fall through */
13083                 case 0x3: /* SQDMLAL, SQDMLAL2 */
13084                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
13085                                                       tcg_res[pass],
13086                                                       tcg_passres);
13087                     break;
13088                 default:
13089                     g_assert_not_reached();
13090                 }
13091                 tcg_temp_free_i64(tcg_passres);
13092             }
13093             tcg_temp_free_i32(tcg_idx);
13094
13095             if (is_scalar) {
13096                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
13097             }
13098         }
13099
13100         if (is_scalar) {
13101             tcg_res[1] = tcg_const_i64(0);
13102         }
13103
13104         for (pass = 0; pass < 2; pass++) {
13105             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
13106             tcg_temp_free_i64(tcg_res[pass]);
13107         }
13108     }
13109
13110     if (fpst) {
13111         tcg_temp_free_ptr(fpst);
13112     }
13113 }
13114
13115 /* Crypto AES
13116  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13117  * +-----------------+------+-----------+--------+-----+------+------+
13118  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13119  * +-----------------+------+-----------+--------+-----+------+------+
13120  */
13121 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
13122 {
13123     int size = extract32(insn, 22, 2);
13124     int opcode = extract32(insn, 12, 5);
13125     int rn = extract32(insn, 5, 5);
13126     int rd = extract32(insn, 0, 5);
13127     int decrypt;
13128     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13129     TCGv_i32 tcg_decrypt;
13130     CryptoThreeOpIntFn *genfn;
13131
13132     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
13133         || size != 0) {
13134         unallocated_encoding(s);
13135         return;
13136     }
13137
13138     switch (opcode) {
13139     case 0x4: /* AESE */
13140         decrypt = 0;
13141         genfn = gen_helper_crypto_aese;
13142         break;
13143     case 0x6: /* AESMC */
13144         decrypt = 0;
13145         genfn = gen_helper_crypto_aesmc;
13146         break;
13147     case 0x5: /* AESD */
13148         decrypt = 1;
13149         genfn = gen_helper_crypto_aese;
13150         break;
13151     case 0x7: /* AESIMC */
13152         decrypt = 1;
13153         genfn = gen_helper_crypto_aesmc;
13154         break;
13155     default:
13156         unallocated_encoding(s);
13157         return;
13158     }
13159
13160     if (!fp_access_check(s)) {
13161         return;
13162     }
13163
13164     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13165     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13166     tcg_decrypt = tcg_const_i32(decrypt);
13167
13168     genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt);
13169
13170     tcg_temp_free_ptr(tcg_rd_ptr);
13171     tcg_temp_free_ptr(tcg_rn_ptr);
13172     tcg_temp_free_i32(tcg_decrypt);
13173 }
13174
13175 /* Crypto three-reg SHA
13176  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
13177  * +-----------------+------+---+------+---+--------+-----+------+------+
13178  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
13179  * +-----------------+------+---+------+---+--------+-----+------+------+
13180  */
13181 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
13182 {
13183     int size = extract32(insn, 22, 2);
13184     int opcode = extract32(insn, 12, 3);
13185     int rm = extract32(insn, 16, 5);
13186     int rn = extract32(insn, 5, 5);
13187     int rd = extract32(insn, 0, 5);
13188     CryptoThreeOpFn *genfn;
13189     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13190     int feature = ARM_FEATURE_V8_SHA256;
13191
13192     if (size != 0) {
13193         unallocated_encoding(s);
13194         return;
13195     }
13196
13197     switch (opcode) {
13198     case 0: /* SHA1C */
13199     case 1: /* SHA1P */
13200     case 2: /* SHA1M */
13201     case 3: /* SHA1SU0 */
13202         genfn = NULL;
13203         feature = ARM_FEATURE_V8_SHA1;
13204         break;
13205     case 4: /* SHA256H */
13206         genfn = gen_helper_crypto_sha256h;
13207         break;
13208     case 5: /* SHA256H2 */
13209         genfn = gen_helper_crypto_sha256h2;
13210         break;
13211     case 6: /* SHA256SU1 */
13212         genfn = gen_helper_crypto_sha256su1;
13213         break;
13214     default:
13215         unallocated_encoding(s);
13216         return;
13217     }
13218
13219     if (!arm_dc_feature(s, feature)) {
13220         unallocated_encoding(s);
13221         return;
13222     }
13223
13224     if (!fp_access_check(s)) {
13225         return;
13226     }
13227
13228     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13229     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13230     tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13231
13232     if (genfn) {
13233         genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
13234     } else {
13235         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
13236
13237         gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr,
13238                                     tcg_rm_ptr, tcg_opcode);
13239         tcg_temp_free_i32(tcg_opcode);
13240     }
13241
13242     tcg_temp_free_ptr(tcg_rd_ptr);
13243     tcg_temp_free_ptr(tcg_rn_ptr);
13244     tcg_temp_free_ptr(tcg_rm_ptr);
13245 }
13246
13247 /* Crypto two-reg SHA
13248  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
13249  * +-----------------+------+-----------+--------+-----+------+------+
13250  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
13251  * +-----------------+------+-----------+--------+-----+------+------+
13252  */
13253 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
13254 {
13255     int size = extract32(insn, 22, 2);
13256     int opcode = extract32(insn, 12, 5);
13257     int rn = extract32(insn, 5, 5);
13258     int rd = extract32(insn, 0, 5);
13259     CryptoTwoOpFn *genfn;
13260     int feature;
13261     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13262
13263     if (size != 0) {
13264         unallocated_encoding(s);
13265         return;
13266     }
13267
13268     switch (opcode) {
13269     case 0: /* SHA1H */
13270         feature = ARM_FEATURE_V8_SHA1;
13271         genfn = gen_helper_crypto_sha1h;
13272         break;
13273     case 1: /* SHA1SU1 */
13274         feature = ARM_FEATURE_V8_SHA1;
13275         genfn = gen_helper_crypto_sha1su1;
13276         break;
13277     case 2: /* SHA256SU0 */
13278         feature = ARM_FEATURE_V8_SHA256;
13279         genfn = gen_helper_crypto_sha256su0;
13280         break;
13281     default:
13282         unallocated_encoding(s);
13283         return;
13284     }
13285
13286     if (!arm_dc_feature(s, feature)) {
13287         unallocated_encoding(s);
13288         return;
13289     }
13290
13291     if (!fp_access_check(s)) {
13292         return;
13293     }
13294
13295     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13296     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13297
13298     genfn(tcg_rd_ptr, tcg_rn_ptr);
13299
13300     tcg_temp_free_ptr(tcg_rd_ptr);
13301     tcg_temp_free_ptr(tcg_rn_ptr);
13302 }
13303
13304 /* Crypto three-reg SHA512
13305  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
13306  * +-----------------------+------+---+---+-----+--------+------+------+
13307  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
13308  * +-----------------------+------+---+---+-----+--------+------+------+
13309  */
13310 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
13311 {
13312     int opcode = extract32(insn, 10, 2);
13313     int o =  extract32(insn, 14, 1);
13314     int rm = extract32(insn, 16, 5);
13315     int rn = extract32(insn, 5, 5);
13316     int rd = extract32(insn, 0, 5);
13317     int feature;
13318     CryptoThreeOpFn *genfn;
13319
13320     if (o == 0) {
13321         switch (opcode) {
13322         case 0: /* SHA512H */
13323             feature = ARM_FEATURE_V8_SHA512;
13324             genfn = gen_helper_crypto_sha512h;
13325             break;
13326         case 1: /* SHA512H2 */
13327             feature = ARM_FEATURE_V8_SHA512;
13328             genfn = gen_helper_crypto_sha512h2;
13329             break;
13330         case 2: /* SHA512SU1 */
13331             feature = ARM_FEATURE_V8_SHA512;
13332             genfn = gen_helper_crypto_sha512su1;
13333             break;
13334         case 3: /* RAX1 */
13335             feature = ARM_FEATURE_V8_SHA3;
13336             genfn = NULL;
13337             break;
13338         }
13339     } else {
13340         switch (opcode) {
13341         case 0: /* SM3PARTW1 */
13342             feature = ARM_FEATURE_V8_SM3;
13343             genfn = gen_helper_crypto_sm3partw1;
13344             break;
13345         case 1: /* SM3PARTW2 */
13346             feature = ARM_FEATURE_V8_SM3;
13347             genfn = gen_helper_crypto_sm3partw2;
13348             break;
13349         case 2: /* SM4EKEY */
13350             feature = ARM_FEATURE_V8_SM4;
13351             genfn = gen_helper_crypto_sm4ekey;
13352             break;
13353         default:
13354             unallocated_encoding(s);
13355             return;
13356         }
13357     }
13358
13359     if (!arm_dc_feature(s, feature)) {
13360         unallocated_encoding(s);
13361         return;
13362     }
13363
13364     if (!fp_access_check(s)) {
13365         return;
13366     }
13367
13368     if (genfn) {
13369         TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13370
13371         tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13372         tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13373         tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13374
13375         genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
13376
13377         tcg_temp_free_ptr(tcg_rd_ptr);
13378         tcg_temp_free_ptr(tcg_rn_ptr);
13379         tcg_temp_free_ptr(tcg_rm_ptr);
13380     } else {
13381         TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13382         int pass;
13383
13384         tcg_op1 = tcg_temp_new_i64();
13385         tcg_op2 = tcg_temp_new_i64();
13386         tcg_res[0] = tcg_temp_new_i64();
13387         tcg_res[1] = tcg_temp_new_i64();
13388
13389         for (pass = 0; pass < 2; pass++) {
13390             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13391             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13392
13393             tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1);
13394             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13395         }
13396         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13397         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13398
13399         tcg_temp_free_i64(tcg_op1);
13400         tcg_temp_free_i64(tcg_op2);
13401         tcg_temp_free_i64(tcg_res[0]);
13402         tcg_temp_free_i64(tcg_res[1]);
13403     }
13404 }
13405
13406 /* Crypto two-reg SHA512
13407  *  31                                     12  11  10  9    5 4    0
13408  * +-----------------------------------------+--------+------+------+
13409  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
13410  * +-----------------------------------------+--------+------+------+
13411  */
13412 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
13413 {
13414     int opcode = extract32(insn, 10, 2);
13415     int rn = extract32(insn, 5, 5);
13416     int rd = extract32(insn, 0, 5);
13417     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
13418     int feature;
13419     CryptoTwoOpFn *genfn;
13420
13421     switch (opcode) {
13422     case 0: /* SHA512SU0 */
13423         feature = ARM_FEATURE_V8_SHA512;
13424         genfn = gen_helper_crypto_sha512su0;
13425         break;
13426     case 1: /* SM4E */
13427         feature = ARM_FEATURE_V8_SM4;
13428         genfn = gen_helper_crypto_sm4e;
13429         break;
13430     default:
13431         unallocated_encoding(s);
13432         return;
13433     }
13434
13435     if (!arm_dc_feature(s, feature)) {
13436         unallocated_encoding(s);
13437         return;
13438     }
13439
13440     if (!fp_access_check(s)) {
13441         return;
13442     }
13443
13444     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13445     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13446
13447     genfn(tcg_rd_ptr, tcg_rn_ptr);
13448
13449     tcg_temp_free_ptr(tcg_rd_ptr);
13450     tcg_temp_free_ptr(tcg_rn_ptr);
13451 }
13452
13453 /* Crypto four-register
13454  *  31               23 22 21 20  16 15  14  10 9    5 4    0
13455  * +-------------------+-----+------+---+------+------+------+
13456  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
13457  * +-------------------+-----+------+---+------+------+------+
13458  */
13459 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
13460 {
13461     int op0 = extract32(insn, 21, 2);
13462     int rm = extract32(insn, 16, 5);
13463     int ra = extract32(insn, 10, 5);
13464     int rn = extract32(insn, 5, 5);
13465     int rd = extract32(insn, 0, 5);
13466     int feature;
13467
13468     switch (op0) {
13469     case 0: /* EOR3 */
13470     case 1: /* BCAX */
13471         feature = ARM_FEATURE_V8_SHA3;
13472         break;
13473     case 2: /* SM3SS1 */
13474         feature = ARM_FEATURE_V8_SM3;
13475         break;
13476     default:
13477         unallocated_encoding(s);
13478         return;
13479     }
13480
13481     if (!arm_dc_feature(s, feature)) {
13482         unallocated_encoding(s);
13483         return;
13484     }
13485
13486     if (!fp_access_check(s)) {
13487         return;
13488     }
13489
13490     if (op0 < 2) {
13491         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
13492         int pass;
13493
13494         tcg_op1 = tcg_temp_new_i64();
13495         tcg_op2 = tcg_temp_new_i64();
13496         tcg_op3 = tcg_temp_new_i64();
13497         tcg_res[0] = tcg_temp_new_i64();
13498         tcg_res[1] = tcg_temp_new_i64();
13499
13500         for (pass = 0; pass < 2; pass++) {
13501             read_vec_element(s, tcg_op1, rn, pass, MO_64);
13502             read_vec_element(s, tcg_op2, rm, pass, MO_64);
13503             read_vec_element(s, tcg_op3, ra, pass, MO_64);
13504
13505             if (op0 == 0) {
13506                 /* EOR3 */
13507                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
13508             } else {
13509                 /* BCAX */
13510                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
13511             }
13512             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
13513         }
13514         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13515         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13516
13517         tcg_temp_free_i64(tcg_op1);
13518         tcg_temp_free_i64(tcg_op2);
13519         tcg_temp_free_i64(tcg_op3);
13520         tcg_temp_free_i64(tcg_res[0]);
13521         tcg_temp_free_i64(tcg_res[1]);
13522     } else {
13523         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
13524
13525         tcg_op1 = tcg_temp_new_i32();
13526         tcg_op2 = tcg_temp_new_i32();
13527         tcg_op3 = tcg_temp_new_i32();
13528         tcg_res = tcg_temp_new_i32();
13529         tcg_zero = tcg_const_i32(0);
13530
13531         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
13532         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
13533         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
13534
13535         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
13536         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
13537         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
13538         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
13539
13540         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
13541         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
13542         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
13543         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
13544
13545         tcg_temp_free_i32(tcg_op1);
13546         tcg_temp_free_i32(tcg_op2);
13547         tcg_temp_free_i32(tcg_op3);
13548         tcg_temp_free_i32(tcg_res);
13549         tcg_temp_free_i32(tcg_zero);
13550     }
13551 }
13552
13553 /* Crypto XAR
13554  *  31                   21 20  16 15    10 9    5 4    0
13555  * +-----------------------+------+--------+------+------+
13556  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
13557  * +-----------------------+------+--------+------+------+
13558  */
13559 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
13560 {
13561     int rm = extract32(insn, 16, 5);
13562     int imm6 = extract32(insn, 10, 6);
13563     int rn = extract32(insn, 5, 5);
13564     int rd = extract32(insn, 0, 5);
13565     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
13566     int pass;
13567
13568     if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA3)) {
13569         unallocated_encoding(s);
13570         return;
13571     }
13572
13573     if (!fp_access_check(s)) {
13574         return;
13575     }
13576
13577     tcg_op1 = tcg_temp_new_i64();
13578     tcg_op2 = tcg_temp_new_i64();
13579     tcg_res[0] = tcg_temp_new_i64();
13580     tcg_res[1] = tcg_temp_new_i64();
13581
13582     for (pass = 0; pass < 2; pass++) {
13583         read_vec_element(s, tcg_op1, rn, pass, MO_64);
13584         read_vec_element(s, tcg_op2, rm, pass, MO_64);
13585
13586         tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
13587         tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6);
13588     }
13589     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
13590     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
13591
13592     tcg_temp_free_i64(tcg_op1);
13593     tcg_temp_free_i64(tcg_op2);
13594     tcg_temp_free_i64(tcg_res[0]);
13595     tcg_temp_free_i64(tcg_res[1]);
13596 }
13597
13598 /* Crypto three-reg imm2
13599  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
13600  * +-----------------------+------+-----+------+--------+------+------+
13601  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
13602  * +-----------------------+------+-----+------+--------+------+------+
13603  */
13604 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
13605 {
13606     int opcode = extract32(insn, 10, 2);
13607     int imm2 = extract32(insn, 12, 2);
13608     int rm = extract32(insn, 16, 5);
13609     int rn = extract32(insn, 5, 5);
13610     int rd = extract32(insn, 0, 5);
13611     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
13612     TCGv_i32 tcg_imm2, tcg_opcode;
13613
13614     if (!arm_dc_feature(s, ARM_FEATURE_V8_SM3)) {
13615         unallocated_encoding(s);
13616         return;
13617     }
13618
13619     if (!fp_access_check(s)) {
13620         return;
13621     }
13622
13623     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13624     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13625     tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13626     tcg_imm2   = tcg_const_i32(imm2);
13627     tcg_opcode = tcg_const_i32(opcode);
13628
13629     gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2,
13630                             tcg_opcode);
13631
13632     tcg_temp_free_ptr(tcg_rd_ptr);
13633     tcg_temp_free_ptr(tcg_rn_ptr);
13634     tcg_temp_free_ptr(tcg_rm_ptr);
13635     tcg_temp_free_i32(tcg_imm2);
13636     tcg_temp_free_i32(tcg_opcode);
13637 }
13638
13639 /* C3.6 Data processing - SIMD, inc Crypto
13640  *
13641  * As the decode gets a little complex we are using a table based
13642  * approach for this part of the decode.
13643  */
13644 static const AArch64DecodeTable data_proc_simd[] = {
13645     /* pattern  ,  mask     ,  fn                        */
13646     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13647     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13648     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13649     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13650     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13651     { 0x0e000400, 0x9fe08400, disas_simd_copy },
13652     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13653     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13654     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13655     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13656     { 0x0e000000, 0xbf208c00, disas_simd_tb },
13657     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13658     { 0x2e000000, 0xbf208400, disas_simd_ext },
13659     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13660     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13661     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13662     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13663     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13664     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13665     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13666     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13667     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13668     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13669     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13670     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13671     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13672     { 0xce000000, 0xff808000, disas_crypto_four_reg },
13673     { 0xce800000, 0xffe00000, disas_crypto_xar },
13674     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13675     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13676     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13677     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13678     { 0x00000000, 0x00000000, NULL }
13679 };
13680
13681 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13682 {
13683     /* Note that this is called with all non-FP cases from
13684      * table C3-6 so it must UNDEF for entries not specifically
13685      * allocated to instructions in that table.
13686      */
13687     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13688     if (fn) {
13689         fn(s, insn);
13690     } else {
13691         unallocated_encoding(s);
13692     }
13693 }
13694
13695 /* C3.6 Data processing - SIMD and floating point */
13696 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13697 {
13698     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13699         disas_data_proc_fp(s, insn);
13700     } else {
13701         /* SIMD, including crypto */
13702         disas_data_proc_simd(s, insn);
13703     }
13704 }
13705
13706 /* C3.1 A64 instruction index by encoding */
13707 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
13708 {
13709     uint32_t insn;
13710
13711     insn = arm_ldl_code(env, s->pc, s->sctlr_b);
13712     s->insn = insn;
13713     s->pc += 4;
13714
13715     s->fp_access_checked = false;
13716
13717     switch (extract32(insn, 25, 4)) {
13718     case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
13719         unallocated_encoding(s);
13720         break;
13721     case 0x2:
13722         if (!arm_dc_feature(s, ARM_FEATURE_SVE) || !disas_sve(s, insn)) {
13723             unallocated_encoding(s);
13724         }
13725         break;
13726     case 0x8: case 0x9: /* Data processing - immediate */
13727         disas_data_proc_imm(s, insn);
13728         break;
13729     case 0xa: case 0xb: /* Branch, exception generation and system insns */
13730         disas_b_exc_sys(s, insn);
13731         break;
13732     case 0x4:
13733     case 0x6:
13734     case 0xc:
13735     case 0xe:      /* Loads and stores */
13736         disas_ldst(s, insn);
13737         break;
13738     case 0x5:
13739     case 0xd:      /* Data processing - register */
13740         disas_data_proc_reg(s, insn);
13741         break;
13742     case 0x7:
13743     case 0xf:      /* Data processing - SIMD and floating point */
13744         disas_data_proc_simd_fp(s, insn);
13745         break;
13746     default:
13747         assert(FALSE); /* all 15 cases should be handled above */
13748         break;
13749     }
13750
13751     /* if we allocated any temporaries, free them here */
13752     free_tmp_a64(s);
13753 }
13754
13755 static void aarch64_tr_init_disas_context(DisasContextBase *dcbase,
13756                                           CPUState *cpu)
13757 {
13758     DisasContext *dc = container_of(dcbase, DisasContext, base);
13759     CPUARMState *env = cpu->env_ptr;
13760     ARMCPU *arm_cpu = arm_env_get_cpu(env);
13761     int bound;
13762
13763     dc->pc = dc->base.pc_first;
13764     dc->condjmp = 0;
13765
13766     dc->aarch64 = 1;
13767     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
13768      * there is no secure EL1, so we route exceptions to EL3.
13769      */
13770     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
13771                                !arm_el_is_aa64(env, 3);
13772     dc->thumb = 0;
13773     dc->sctlr_b = 0;
13774     dc->be_data = ARM_TBFLAG_BE_DATA(dc->base.tb->flags) ? MO_BE : MO_LE;
13775     dc->condexec_mask = 0;
13776     dc->condexec_cond = 0;
13777     dc->mmu_idx = core_to_arm_mmu_idx(env, ARM_TBFLAG_MMUIDX(dc->base.tb->flags));
13778     dc->tbi0 = ARM_TBFLAG_TBI0(dc->base.tb->flags);
13779     dc->tbi1 = ARM_TBFLAG_TBI1(dc->base.tb->flags);
13780     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
13781 #if !defined(CONFIG_USER_ONLY)
13782     dc->user = (dc->current_el == 0);
13783 #endif
13784     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(dc->base.tb->flags);
13785     dc->sve_excp_el = ARM_TBFLAG_SVEEXC_EL(dc->base.tb->flags);
13786     dc->sve_len = (ARM_TBFLAG_ZCR_LEN(dc->base.tb->flags) + 1) * 16;
13787     dc->vec_len = 0;
13788     dc->vec_stride = 0;
13789     dc->cp_regs = arm_cpu->cp_regs;
13790     dc->features = env->features;
13791
13792     /* Single step state. The code-generation logic here is:
13793      *  SS_ACTIVE == 0:
13794      *   generate code with no special handling for single-stepping (except
13795      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
13796      *   this happens anyway because those changes are all system register or
13797      *   PSTATE writes).
13798      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
13799      *   emit code for one insn
13800      *   emit code to clear PSTATE.SS
13801      *   emit code to generate software step exception for completed step
13802      *   end TB (as usual for having generated an exception)
13803      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
13804      *   emit code to generate a software step exception
13805      *   end the TB
13806      */
13807     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(dc->base.tb->flags);
13808     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(dc->base.tb->flags);
13809     dc->is_ldex = false;
13810     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
13811
13812     /* Bound the number of insns to execute to those left on the page.  */
13813     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
13814
13815     /* If architectural single step active, limit to 1.  */
13816     if (dc->ss_active) {
13817         bound = 1;
13818     }
13819     dc->base.max_insns = MIN(dc->base.max_insns, bound);
13820
13821     init_tmp_a64_array(dc);
13822 }
13823
13824 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
13825 {
13826     tcg_clear_temp_count();
13827 }
13828
13829 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
13830 {
13831     DisasContext *dc = container_of(dcbase, DisasContext, base);
13832
13833     tcg_gen_insn_start(dc->pc, 0, 0);
13834     dc->insn_start = tcg_last_op();
13835 }
13836
13837 static bool aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
13838                                         const CPUBreakpoint *bp)
13839 {
13840     DisasContext *dc = container_of(dcbase, DisasContext, base);
13841
13842     if (bp->flags & BP_CPU) {
13843         gen_a64_set_pc_im(dc->pc);
13844         gen_helper_check_breakpoints(cpu_env);
13845         /* End the TB early; it likely won't be executed */
13846         dc->base.is_jmp = DISAS_TOO_MANY;
13847     } else {
13848         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
13849         /* The address covered by the breakpoint must be
13850            included in [tb->pc, tb->pc + tb->size) in order
13851            to for it to be properly cleared -- thus we
13852            increment the PC here so that the logic setting
13853            tb->size below does the right thing.  */
13854         dc->pc += 4;
13855         dc->base.is_jmp = DISAS_NORETURN;
13856     }
13857
13858     return true;
13859 }
13860
13861 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
13862 {
13863     DisasContext *dc = container_of(dcbase, DisasContext, base);
13864     CPUARMState *env = cpu->env_ptr;
13865
13866     if (dc->ss_active && !dc->pstate_ss) {
13867         /* Singlestep state is Active-pending.
13868          * If we're in this state at the start of a TB then either
13869          *  a) we just took an exception to an EL which is being debugged
13870          *     and this is the first insn in the exception handler
13871          *  b) debug exceptions were masked and we just unmasked them
13872          *     without changing EL (eg by clearing PSTATE.D)
13873          * In either case we're going to take a swstep exception in the
13874          * "did not step an insn" case, and so the syndrome ISV and EX
13875          * bits should be zero.
13876          */
13877         assert(dc->base.num_insns == 1);
13878         gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
13879                       default_exception_el(dc));
13880         dc->base.is_jmp = DISAS_NORETURN;
13881     } else {
13882         disas_a64_insn(env, dc);
13883     }
13884
13885     dc->base.pc_next = dc->pc;
13886     translator_loop_temp_check(&dc->base);
13887 }
13888
13889 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
13890 {
13891     DisasContext *dc = container_of(dcbase, DisasContext, base);
13892
13893     if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
13894         /* Note that this means single stepping WFI doesn't halt the CPU.
13895          * For conditional branch insns this is harmless unreachable code as
13896          * gen_goto_tb() has already handled emitting the debug exception
13897          * (and thus a tb-jump is not possible when singlestepping).
13898          */
13899         switch (dc->base.is_jmp) {
13900         default:
13901             gen_a64_set_pc_im(dc->pc);
13902             /* fall through */
13903         case DISAS_EXIT:
13904         case DISAS_JUMP:
13905             if (dc->base.singlestep_enabled) {
13906                 gen_exception_internal(EXCP_DEBUG);
13907             } else {
13908                 gen_step_complete_exception(dc);
13909             }
13910             break;
13911         case DISAS_NORETURN:
13912             break;
13913         }
13914     } else {
13915         switch (dc->base.is_jmp) {
13916         case DISAS_NEXT:
13917         case DISAS_TOO_MANY:
13918             gen_goto_tb(dc, 1, dc->pc);
13919             break;
13920         default:
13921         case DISAS_UPDATE:
13922             gen_a64_set_pc_im(dc->pc);
13923             /* fall through */
13924         case DISAS_EXIT:
13925             tcg_gen_exit_tb(NULL, 0);
13926             break;
13927         case DISAS_JUMP:
13928             tcg_gen_lookup_and_goto_ptr();
13929             break;
13930         case DISAS_NORETURN:
13931         case DISAS_SWI:
13932             break;
13933         case DISAS_WFE:
13934             gen_a64_set_pc_im(dc->pc);
13935             gen_helper_wfe(cpu_env);
13936             break;
13937         case DISAS_YIELD:
13938             gen_a64_set_pc_im(dc->pc);
13939             gen_helper_yield(cpu_env);
13940             break;
13941         case DISAS_WFI:
13942         {
13943             /* This is a special case because we don't want to just halt the CPU
13944              * if trying to debug across a WFI.
13945              */
13946             TCGv_i32 tmp = tcg_const_i32(4);
13947
13948             gen_a64_set_pc_im(dc->pc);
13949             gen_helper_wfi(cpu_env, tmp);
13950             tcg_temp_free_i32(tmp);
13951             /* The helper doesn't necessarily throw an exception, but we
13952              * must go back to the main loop to check for interrupts anyway.
13953              */
13954             tcg_gen_exit_tb(NULL, 0);
13955             break;
13956         }
13957         }
13958     }
13959
13960     /* Functions above can change dc->pc, so re-align db->pc_next */
13961     dc->base.pc_next = dc->pc;
13962 }
13963
13964 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
13965                                       CPUState *cpu)
13966 {
13967     DisasContext *dc = container_of(dcbase, DisasContext, base);
13968
13969     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
13970     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
13971 }
13972
13973 const TranslatorOps aarch64_translator_ops = {
13974     .init_disas_context = aarch64_tr_init_disas_context,
13975     .tb_start           = aarch64_tr_tb_start,
13976     .insn_start         = aarch64_tr_insn_start,
13977     .breakpoint_check   = aarch64_tr_breakpoint_check,
13978     .translate_insn     = aarch64_tr_translate_insn,
13979     .tb_stop            = aarch64_tr_tb_stop,
13980     .disas_log          = aarch64_tr_disas_log,
13981 };