target/arm/translate-a64.c

   1 /*
   2  *  AArch64 translation
   3  *
   4  *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5  *
   6  * This library is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2 of the License, or (at your option) any later version.
  10  *
  11  * This library is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19 #include "qemu/osdep.h"
  20
  21 #include "cpu.h"
  22 #include "exec/exec-all.h"
  23 #include "tcg-op.h"
  24 #include "tcg-op-gvec.h"
  25 #include "qemu/log.h"
  26 #include "arm_ldst.h"
  27 #include "translate.h"
  28 #include "internals.h"
  29 #include "qemu/host-utils.h"
  30
  31 #include "exec/semihost.h"
  32 #include "exec/gen-icount.h"
  33
  34 #include "exec/helper-proto.h"
  35 #include "exec/helper-gen.h"
  36 #include "exec/log.h"
  37
  38 #include "trace-tcg.h"
  39
  40 static TCGv_i64 cpu_X[32];
  41 static TCGv_i64 cpu_pc;
  42
  43 /* Load/store exclusive handling */
  44 static TCGv_i64 cpu_exclusive_high;
  45 static TCGv_i64 cpu_reg(DisasContext *s, int reg);
  46
  47 static const char *regnames[] = {
  48     "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
  49     "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
  50     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
  51     "x24", "x25", "x26", "x27", "x28", "x29", "lr", "sp"
  52 };
  53
  54 enum a64_shift_type {
  55     A64_SHIFT_TYPE_LSL = 0,
  56     A64_SHIFT_TYPE_LSR = 1,
  57     A64_SHIFT_TYPE_ASR = 2,
  58     A64_SHIFT_TYPE_ROR = 3
  59 };
  60
  61 /* Table based decoder typedefs - used when the relevant bits for decode
  62  * are too awkwardly scattered across the instruction (eg SIMD).
  63  */
  64 typedef void AArch64DecodeFn(DisasContext *s, uint32_t insn);
  65
  66 typedef struct AArch64DecodeTable {
  67     uint32_t pattern;
  68     uint32_t mask;
  69     AArch64DecodeFn *disas_fn;
  70 } AArch64DecodeTable;
  71
  72 /* Function prototype for gen_ functions for calling Neon helpers */
  73 typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
  74 typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
  75 typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
  76 typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
  77 typedef void NeonGenTwo64OpEnvFn(TCGv_i64, TCGv_ptr, TCGv_i64, TCGv_i64);
  78 typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
  79 typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
  80 typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
  81 typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
  82 typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
  83 typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
  84 typedef void CryptoTwoOpFn(TCGv_ptr, TCGv_ptr);
  85 typedef void CryptoThreeOpIntFn(TCGv_ptr, TCGv_ptr, TCGv_i32);
  86 typedef void CryptoThreeOpFn(TCGv_ptr, TCGv_ptr, TCGv_ptr);
  87
  88 /* Note that the gvec expanders operate on offsets + sizes.  */
  89 typedef void GVecGen2Fn(unsigned, uint32_t, uint32_t, uint32_t, uint32_t);
  90 typedef void GVecGen2iFn(unsigned, uint32_t, uint32_t, int64_t,
  91                          uint32_t, uint32_t);
  92 typedef void GVecGen3Fn(unsigned, uint32_t, uint32_t,
  93                         uint32_t, uint32_t, uint32_t);
  94
  95 /* initialize TCG globals.  */
  96 void a64_translate_init(void)
  97 {
  98     int i;
  99
 100     cpu_pc = tcg_global_mem_new_i64(cpu_env,
 101                                     offsetof(CPUARMState, pc),
 102                                     "pc");
 103     for (i = 0; i < 32; i++) {
 104         cpu_X[i] = tcg_global_mem_new_i64(cpu_env,
 105                                           offsetof(CPUARMState, xregs[i]),
 106                                           regnames[i]);
 107     }
 108
 109     cpu_exclusive_high = tcg_global_mem_new_i64(cpu_env,
 110         offsetof(CPUARMState, exclusive_high), "exclusive_high");
 111 }
 112
 113 static inline int get_a64_user_mem_index(DisasContext *s)
 114 {
 115     /* Return the core mmu_idx to use for A64 "unprivileged load/store" insns:
 116      *  if EL1, access as if EL0; otherwise access at current EL
 117      */
 118     ARMMMUIdx useridx;
 119
 120     switch (s->mmu_idx) {
 121     case ARMMMUIdx_S12NSE1:
 122         useridx = ARMMMUIdx_S12NSE0;
 123         break;
 124     case ARMMMUIdx_S1SE1:
 125         useridx = ARMMMUIdx_S1SE0;
 126         break;
 127     case ARMMMUIdx_S2NS:
 128         g_assert_not_reached();
 129     default:
 130         useridx = s->mmu_idx;
 131         break;
 132     }
 133     return arm_to_core_mmu_idx(useridx);
 134 }
 135
 136 void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
 137                             fprintf_function cpu_fprintf, int flags)
 138 {
 139     ARMCPU *cpu = ARM_CPU(cs);
 140     CPUARMState *env = &cpu->env;
 141     uint32_t psr = pstate_read(env);
 142     int i;
 143     int el = arm_current_el(env);
 144     const char *ns_status;
 145
 146     cpu_fprintf(f, "PC=%016"PRIx64"  SP=%016"PRIx64"\n",
 147             env->pc, env->xregs[31]);
 148     for (i = 0; i < 31; i++) {
 149         cpu_fprintf(f, "X%02d=%016"PRIx64, i, env->xregs[i]);
 150         if ((i % 4) == 3) {
 151             cpu_fprintf(f, "\n");
 152         } else {
 153             cpu_fprintf(f, " ");
 154         }
 155     }
 156
 157     if (arm_feature(env, ARM_FEATURE_EL3) && el != 3) {
 158         ns_status = env->cp15.scr_el3 & SCR_NS ? "NS " : "S ";
 159     } else {
 160         ns_status = "";
 161     }
 162
 163     cpu_fprintf(f, "\nPSTATE=%08x %c%c%c%c %sEL%d%c\n",
 164                 psr,
 165                 psr & PSTATE_N ? 'N' : '-',
 166                 psr & PSTATE_Z ? 'Z' : '-',
 167                 psr & PSTATE_C ? 'C' : '-',
 168                 psr & PSTATE_V ? 'V' : '-',
 169                 ns_status,
 170                 el,
 171                 psr & PSTATE_SP ? 'h' : 't');
 172
 173     if (flags & CPU_DUMP_FPU) {
 174         int numvfpregs = 32;
 175         for (i = 0; i < numvfpregs; i++) {
 176             uint64_t *q = aa64_vfp_qreg(env, i);
 177             uint64_t vlo = q[0];
 178             uint64_t vhi = q[1];
 179             cpu_fprintf(f, "q%02d=%016" PRIx64 ":%016" PRIx64 "%c",
 180                         i, vhi, vlo, (i & 1 ? '\n' : ' '));
 181         }
 182         cpu_fprintf(f, "FPCR: %08x  FPSR: %08x\n",
 183                     vfp_get_fpcr(env), vfp_get_fpsr(env));
 184     }
 185 }
 186
 187 void gen_a64_set_pc_im(uint64_t val)
 188 {
 189     tcg_gen_movi_i64(cpu_pc, val);
 190 }
 191
 192 /* Load the PC from a generic TCG variable.
 193  *
 194  * If address tagging is enabled via the TCR TBI bits, then loading
 195  * an address into the PC will clear out any tag in the it:
 196  *  + for EL2 and EL3 there is only one TBI bit, and if it is set
 197  *    then the address is zero-extended, clearing bits [63:56]
 198  *  + for EL0 and EL1, TBI0 controls addresses with bit 55 == 0
 199  *    and TBI1 controls addressses with bit 55 == 1.
 200  *    If the appropriate TBI bit is set for the address then
 201  *    the address is sign-extended from bit 55 into bits [63:56]
 202  *
 203  * We can avoid doing this for relative-branches, because the
 204  * PC + offset can never overflow into the tag bits (assuming
 205  * that virtual addresses are less than 56 bits wide, as they
 206  * are currently), but we must handle it for branch-to-register.
 207  */
 208 static void gen_a64_set_pc(DisasContext *s, TCGv_i64 src)
 209 {
 210
 211     if (s->current_el <= 1) {
 212         /* Test if NEITHER or BOTH TBI values are set.  If so, no need to
 213          * examine bit 55 of address, can just generate code.
 214          * If mixed, then test via generated code
 215          */
 216         if (s->tbi0 && s->tbi1) {
 217             TCGv_i64 tmp_reg = tcg_temp_new_i64();
 218             /* Both bits set, sign extension from bit 55 into [63:56] will
 219              * cover both cases
 220              */
 221             tcg_gen_shli_i64(tmp_reg, src, 8);
 222             tcg_gen_sari_i64(cpu_pc, tmp_reg, 8);
 223             tcg_temp_free_i64(tmp_reg);
 224         } else if (!s->tbi0 && !s->tbi1) {
 225             /* Neither bit set, just load it as-is */
 226             tcg_gen_mov_i64(cpu_pc, src);
 227         } else {
 228             TCGv_i64 tcg_tmpval = tcg_temp_new_i64();
 229             TCGv_i64 tcg_bit55  = tcg_temp_new_i64();
 230             TCGv_i64 tcg_zero   = tcg_const_i64(0);
 231
 232             tcg_gen_andi_i64(tcg_bit55, src, (1ull << 55));
 233
 234             if (s->tbi0) {
 235                 /* tbi0==1, tbi1==0, so 0-fill upper byte if bit 55 = 0 */
 236                 tcg_gen_andi_i64(tcg_tmpval, src,
 237                                  0x00FFFFFFFFFFFFFFull);
 238                 tcg_gen_movcond_i64(TCG_COND_EQ, cpu_pc, tcg_bit55, tcg_zero,
 239                                     tcg_tmpval, src);
 240             } else {
 241                 /* tbi0==0, tbi1==1, so 1-fill upper byte if bit 55 = 1 */
 242                 tcg_gen_ori_i64(tcg_tmpval, src,
 243                                 0xFF00000000000000ull);
 244                 tcg_gen_movcond_i64(TCG_COND_NE, cpu_pc, tcg_bit55, tcg_zero,
 245                                     tcg_tmpval, src);
 246             }
 247             tcg_temp_free_i64(tcg_zero);
 248             tcg_temp_free_i64(tcg_bit55);
 249             tcg_temp_free_i64(tcg_tmpval);
 250         }
 251     } else {  /* EL > 1 */
 252         if (s->tbi0) {
 253             /* Force tag byte to all zero */
 254             tcg_gen_andi_i64(cpu_pc, src, 0x00FFFFFFFFFFFFFFull);
 255         } else {
 256             /* Load unmodified address */
 257             tcg_gen_mov_i64(cpu_pc, src);
 258         }
 259     }
 260 }
 261
 262 typedef struct DisasCompare64 {
 263     TCGCond cond;
 264     TCGv_i64 value;
 265 } DisasCompare64;
 266
 267 static void a64_test_cc(DisasCompare64 *c64, int cc)
 268 {
 269     DisasCompare c32;
 270
 271     arm_test_cc(&c32, cc);
 272
 273     /* Sign-extend the 32-bit value so that the GE/LT comparisons work
 274        * properly.  The NE/EQ comparisons are also fine with this choice.  */
 275     c64->cond = c32.cond;
 276     c64->value = tcg_temp_new_i64();
 277     tcg_gen_ext_i32_i64(c64->value, c32.value);
 278
 279     arm_free_cc(&c32);
 280 }
 281
 282 static void a64_free_cc(DisasCompare64 *c64)
 283 {
 284     tcg_temp_free_i64(c64->value);
 285 }
 286
 287 static void gen_exception_internal(int excp)
 288 {
 289     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 290
 291     assert(excp_is_internal(excp));
 292     gen_helper_exception_internal(cpu_env, tcg_excp);
 293     tcg_temp_free_i32(tcg_excp);
 294 }
 295
 296 static void gen_exception(int excp, uint32_t syndrome, uint32_t target_el)
 297 {
 298     TCGv_i32 tcg_excp = tcg_const_i32(excp);
 299     TCGv_i32 tcg_syn = tcg_const_i32(syndrome);
 300     TCGv_i32 tcg_el = tcg_const_i32(target_el);
 301
 302     gen_helper_exception_with_syndrome(cpu_env, tcg_excp,
 303                                        tcg_syn, tcg_el);
 304     tcg_temp_free_i32(tcg_el);
 305     tcg_temp_free_i32(tcg_syn);
 306     tcg_temp_free_i32(tcg_excp);
 307 }
 308
 309 static void gen_exception_internal_insn(DisasContext *s, int offset, int excp)
 310 {
 311     gen_a64_set_pc_im(s->pc - offset);
 312     gen_exception_internal(excp);
 313     s->base.is_jmp = DISAS_NORETURN;
 314 }
 315
 316 static void gen_exception_insn(DisasContext *s, int offset, int excp,
 317                                uint32_t syndrome, uint32_t target_el)
 318 {
 319     gen_a64_set_pc_im(s->pc - offset);
 320     gen_exception(excp, syndrome, target_el);
 321     s->base.is_jmp = DISAS_NORETURN;
 322 }
 323
 324 static void gen_ss_advance(DisasContext *s)
 325 {
 326     /* If the singlestep state is Active-not-pending, advance to
 327      * Active-pending.
 328      */
 329     if (s->ss_active) {
 330         s->pstate_ss = 0;
 331         gen_helper_clear_pstate_ss(cpu_env);
 332     }
 333 }
 334
 335 static void gen_step_complete_exception(DisasContext *s)
 336 {
 337     /* We just completed step of an insn. Move from Active-not-pending
 338      * to Active-pending, and then also take the swstep exception.
 339      * This corresponds to making the (IMPDEF) choice to prioritize
 340      * swstep exceptions over asynchronous exceptions taken to an exception
 341      * level where debug is disabled. This choice has the advantage that
 342      * we do not need to maintain internal state corresponding to the
 343      * ISV/EX syndrome bits between completion of the step and generation
 344      * of the exception, and our syndrome information is always correct.
 345      */
 346     gen_ss_advance(s);
 347     gen_exception(EXCP_UDEF, syn_swstep(s->ss_same_el, 1, s->is_ldex),
 348                   default_exception_el(s));
 349     s->base.is_jmp = DISAS_NORETURN;
 350 }
 351
 352 static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
 353 {
 354     /* No direct tb linking with singlestep (either QEMU's or the ARM
 355      * debug architecture kind) or deterministic io
 356      */
 357     if (s->base.singlestep_enabled || s->ss_active ||
 358         (tb_cflags(s->base.tb) & CF_LAST_IO)) {
 359         return false;
 360     }
 361
 362 #ifndef CONFIG_USER_ONLY
 363     /* Only link tbs from inside the same guest page */
 364     if ((s->base.tb->pc & TARGET_PAGE_MASK) != (dest & TARGET_PAGE_MASK)) {
 365         return false;
 366     }
 367 #endif
 368
 369     return true;
 370 }
 371
 372 static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
 373 {
 374     TranslationBlock *tb;
 375
 376     tb = s->base.tb;
 377     if (use_goto_tb(s, n, dest)) {
 378         tcg_gen_goto_tb(n);
 379         gen_a64_set_pc_im(dest);
 380         tcg_gen_exit_tb((intptr_t)tb + n);
 381         s->base.is_jmp = DISAS_NORETURN;
 382     } else {
 383         gen_a64_set_pc_im(dest);
 384         if (s->ss_active) {
 385             gen_step_complete_exception(s);
 386         } else if (s->base.singlestep_enabled) {
 387             gen_exception_internal(EXCP_DEBUG);
 388         } else {
 389             tcg_gen_lookup_and_goto_ptr();
 390             s->base.is_jmp = DISAS_NORETURN;
 391         }
 392     }
 393 }
 394
 395 static void unallocated_encoding(DisasContext *s)
 396 {
 397     /* Unallocated and reserved encodings are uncategorized */
 398     gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(),
 399                        default_exception_el(s));
 400 }
 401
 402 #define unsupported_encoding(s, insn)                                    \
 403     do {                                                                 \
 404         qemu_log_mask(LOG_UNIMP,                                         \
 405                       "%s:%d: unsupported instruction encoding 0x%08x "  \
 406                       "at pc=%016" PRIx64 "\n",                          \
 407                       __FILE__, __LINE__, insn, s->pc - 4);              \
 408         unallocated_encoding(s);                                         \
 409     } while (0)
 410
 411 static void init_tmp_a64_array(DisasContext *s)
 412 {
 413 #ifdef CONFIG_DEBUG_TCG
 414     memset(s->tmp_a64, 0, sizeof(s->tmp_a64));
 415 #endif
 416     s->tmp_a64_count = 0;
 417 }
 418
 419 static void free_tmp_a64(DisasContext *s)
 420 {
 421     int i;
 422     for (i = 0; i < s->tmp_a64_count; i++) {
 423         tcg_temp_free_i64(s->tmp_a64[i]);
 424     }
 425     init_tmp_a64_array(s);
 426 }
 427
 428 static TCGv_i64 new_tmp_a64(DisasContext *s)
 429 {
 430     assert(s->tmp_a64_count < TMP_A64_MAX);
 431     return s->tmp_a64[s->tmp_a64_count++] = tcg_temp_new_i64();
 432 }
 433
 434 static TCGv_i64 new_tmp_a64_zero(DisasContext *s)
 435 {
 436     TCGv_i64 t = new_tmp_a64(s);
 437     tcg_gen_movi_i64(t, 0);
 438     return t;
 439 }
 440
 441 /*
 442  * Register access functions
 443  *
 444  * These functions are used for directly accessing a register in where
 445  * changes to the final register value are likely to be made. If you
 446  * need to use a register for temporary calculation (e.g. index type
 447  * operations) use the read_* form.
 448  *
 449  * B1.2.1 Register mappings
 450  *
 451  * In instruction register encoding 31 can refer to ZR (zero register) or
 452  * the SP (stack pointer) depending on context. In QEMU's case we map SP
 453  * to cpu_X[31] and ZR accesses to a temporary which can be discarded.
 454  * This is the point of the _sp forms.
 455  */
 456 static TCGv_i64 cpu_reg(DisasContext *s, int reg)
 457 {
 458     if (reg == 31) {
 459         return new_tmp_a64_zero(s);
 460     } else {
 461         return cpu_X[reg];
 462     }
 463 }
 464
 465 /* register access for when 31 == SP */
 466 static TCGv_i64 cpu_reg_sp(DisasContext *s, int reg)
 467 {
 468     return cpu_X[reg];
 469 }
 470
 471 /* read a cpu register in 32bit/64bit mode. Returns a TCGv_i64
 472  * representing the register contents. This TCGv is an auto-freed
 473  * temporary so it need not be explicitly freed, and may be modified.
 474  */
 475 static TCGv_i64 read_cpu_reg(DisasContext *s, int reg, int sf)
 476 {
 477     TCGv_i64 v = new_tmp_a64(s);
 478     if (reg != 31) {
 479         if (sf) {
 480             tcg_gen_mov_i64(v, cpu_X[reg]);
 481         } else {
 482             tcg_gen_ext32u_i64(v, cpu_X[reg]);
 483         }
 484     } else {
 485         tcg_gen_movi_i64(v, 0);
 486     }
 487     return v;
 488 }
 489
 490 static TCGv_i64 read_cpu_reg_sp(DisasContext *s, int reg, int sf)
 491 {
 492     TCGv_i64 v = new_tmp_a64(s);
 493     if (sf) {
 494         tcg_gen_mov_i64(v, cpu_X[reg]);
 495     } else {
 496         tcg_gen_ext32u_i64(v, cpu_X[reg]);
 497     }
 498     return v;
 499 }
 500
 501 /* We should have at some point before trying to access an FP register
 502  * done the necessary access check, so assert that
 503  * (a) we did the check and
 504  * (b) we didn't then just plough ahead anyway if it failed.
 505  * Print the instruction pattern in the abort message so we can figure
 506  * out what we need to fix if a user encounters this problem in the wild.
 507  */
 508 static inline void assert_fp_access_checked(DisasContext *s)
 509 {
 510 #ifdef CONFIG_DEBUG_TCG
 511     if (unlikely(!s->fp_access_checked || s->fp_excp_el)) {
 512         fprintf(stderr, "target-arm: FP access check missing for "
 513                 "instruction 0x%08x\n", s->insn);
 514         abort();
 515     }
 516 #endif
 517 }
 518
 519 /* Return the offset into CPUARMState of an element of specified
 520  * size, 'element' places in from the least significant end of
 521  * the FP/vector register Qn.
 522  */
 523 static inline int vec_reg_offset(DisasContext *s, int regno,
 524                                  int element, TCGMemOp size)
 525 {
 526     int offs = 0;
 527 #ifdef HOST_WORDS_BIGENDIAN
 528     /* This is complicated slightly because vfp.zregs[n].d[0] is
 529      * still the low half and vfp.zregs[n].d[1] the high half
 530      * of the 128 bit vector, even on big endian systems.
 531      * Calculate the offset assuming a fully bigendian 128 bits,
 532      * then XOR to account for the order of the two 64 bit halves.
 533      */
 534     offs += (16 - ((element + 1) * (1 << size)));
 535     offs ^= 8;
 536 #else
 537     offs += element * (1 << size);
 538 #endif
 539     offs += offsetof(CPUARMState, vfp.zregs[regno]);
 540     assert_fp_access_checked(s);
 541     return offs;
 542 }
 543
 544 /* Return the offset info CPUARMState of the "whole" vector register Qn.  */
 545 static inline int vec_full_reg_offset(DisasContext *s, int regno)
 546 {
 547     assert_fp_access_checked(s);
 548     return offsetof(CPUARMState, vfp.zregs[regno]);
 549 }
 550
 551 /* Return a newly allocated pointer to the vector register.  */
 552 static TCGv_ptr vec_full_reg_ptr(DisasContext *s, int regno)
 553 {
 554     TCGv_ptr ret = tcg_temp_new_ptr();
 555     tcg_gen_addi_ptr(ret, cpu_env, vec_full_reg_offset(s, regno));
 556     return ret;
 557 }
 558
 559 /* Return the byte size of the "whole" vector register, VL / 8.  */
 560 static inline int vec_full_reg_size(DisasContext *s)
 561 {
 562     /* FIXME SVE: We should put the composite ZCR_EL* value into tb->flags.
 563        In the meantime this is just the AdvSIMD length of 128.  */
 564     return 128 / 8;
 565 }
 566
 567 /* Return the offset into CPUARMState of a slice (from
 568  * the least significant end) of FP register Qn (ie
 569  * Dn, Sn, Hn or Bn).
 570  * (Note that this is not the same mapping as for A32; see cpu.h)
 571  */
 572 static inline int fp_reg_offset(DisasContext *s, int regno, TCGMemOp size)
 573 {
 574     return vec_reg_offset(s, regno, 0, size);
 575 }
 576
 577 /* Offset of the high half of the 128 bit vector Qn */
 578 static inline int fp_reg_hi_offset(DisasContext *s, int regno)
 579 {
 580     return vec_reg_offset(s, regno, 1, MO_64);
 581 }
 582
 583 /* Convenience accessors for reading and writing single and double
 584  * FP registers. Writing clears the upper parts of the associated
 585  * 128 bit vector register, as required by the architecture.
 586  * Note that unlike the GP register accessors, the values returned
 587  * by the read functions must be manually freed.
 588  */
 589 static TCGv_i64 read_fp_dreg(DisasContext *s, int reg)
 590 {
 591     TCGv_i64 v = tcg_temp_new_i64();
 592
 593     tcg_gen_ld_i64(v, cpu_env, fp_reg_offset(s, reg, MO_64));
 594     return v;
 595 }
 596
 597 static TCGv_i32 read_fp_sreg(DisasContext *s, int reg)
 598 {
 599     TCGv_i32 v = tcg_temp_new_i32();
 600
 601     tcg_gen_ld_i32(v, cpu_env, fp_reg_offset(s, reg, MO_32));
 602     return v;
 603 }
 604
 605 /* Clear the bits above an N-bit vector, for N = (is_q ? 128 : 64).
 606  * If SVE is not enabled, then there are only 128 bits in the vector.
 607  */
 608 static void clear_vec_high(DisasContext *s, bool is_q, int rd)
 609 {
 610     unsigned ofs = fp_reg_offset(s, rd, MO_64);
 611     unsigned vsz = vec_full_reg_size(s);
 612
 613     if (!is_q) {
 614         TCGv_i64 tcg_zero = tcg_const_i64(0);
 615         tcg_gen_st_i64(tcg_zero, cpu_env, ofs + 8);
 616         tcg_temp_free_i64(tcg_zero);
 617     }
 618     if (vsz > 16) {
 619         tcg_gen_gvec_dup8i(ofs + 16, vsz - 16, vsz - 16, 0);
 620     }
 621 }
 622
 623 static void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v)
 624 {
 625     unsigned ofs = fp_reg_offset(s, reg, MO_64);
 626
 627     tcg_gen_st_i64(v, cpu_env, ofs);
 628     clear_vec_high(s, false, reg);
 629 }
 630
 631 static void write_fp_sreg(DisasContext *s, int reg, TCGv_i32 v)
 632 {
 633     TCGv_i64 tmp = tcg_temp_new_i64();
 634
 635     tcg_gen_extu_i32_i64(tmp, v);
 636     write_fp_dreg(s, reg, tmp);
 637     tcg_temp_free_i64(tmp);
 638 }
 639
 640 static TCGv_ptr get_fpstatus_ptr(bool is_f16)
 641 {
 642     TCGv_ptr statusptr = tcg_temp_new_ptr();
 643     int offset;
 644
 645     /* In A64 all instructions (both FP and Neon) use the FPCR; there
 646      * is no equivalent of the A32 Neon "standard FPSCR value".
 647      * However half-precision operations operate under a different
 648      * FZ16 flag and use vfp.fp_status_f16 instead of vfp.fp_status.
 649      */
 650     if (is_f16) {
 651         offset = offsetof(CPUARMState, vfp.fp_status_f16);
 652     } else {
 653         offset = offsetof(CPUARMState, vfp.fp_status);
 654     }
 655     tcg_gen_addi_ptr(statusptr, cpu_env, offset);
 656     return statusptr;
 657 }
 658
 659 /* Expand a 2-operand AdvSIMD vector operation using an expander function.  */
 660 static void gen_gvec_fn2(DisasContext *s, bool is_q, int rd, int rn,
 661                          GVecGen2Fn *gvec_fn, int vece)
 662 {
 663     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 664             is_q ? 16 : 8, vec_full_reg_size(s));
 665 }
 666
 667 /* Expand a 2-operand + immediate AdvSIMD vector operation using
 668  * an expander function.
 669  */
 670 static void gen_gvec_fn2i(DisasContext *s, bool is_q, int rd, int rn,
 671                           int64_t imm, GVecGen2iFn *gvec_fn, int vece)
 672 {
 673     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 674             imm, is_q ? 16 : 8, vec_full_reg_size(s));
 675 }
 676
 677 /* Expand a 3-operand AdvSIMD vector operation using an expander function.  */
 678 static void gen_gvec_fn3(DisasContext *s, bool is_q, int rd, int rn, int rm,
 679                          GVecGen3Fn *gvec_fn, int vece)
 680 {
 681     gvec_fn(vece, vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 682             vec_full_reg_offset(s, rm), is_q ? 16 : 8, vec_full_reg_size(s));
 683 }
 684
 685 /* Expand a 2-operand + immediate AdvSIMD vector operation using
 686  * an op descriptor.
 687  */
 688 static void gen_gvec_op2i(DisasContext *s, bool is_q, int rd,
 689                           int rn, int64_t imm, const GVecGen2i *gvec_op)
 690 {
 691     tcg_gen_gvec_2i(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 692                     is_q ? 16 : 8, vec_full_reg_size(s), imm, gvec_op);
 693 }
 694
 695 /* Expand a 3-operand AdvSIMD vector operation using an op descriptor.  */
 696 static void gen_gvec_op3(DisasContext *s, bool is_q, int rd,
 697                          int rn, int rm, const GVecGen3 *gvec_op)
 698 {
 699     tcg_gen_gvec_3(vec_full_reg_offset(s, rd), vec_full_reg_offset(s, rn),
 700                    vec_full_reg_offset(s, rm), is_q ? 16 : 8,
 701                    vec_full_reg_size(s), gvec_op);
 702 }
 703
 704 /* Expand a 3-operand + env pointer operation using
 705  * an out-of-line helper.
 706  */
 707 static void gen_gvec_op3_env(DisasContext *s, bool is_q, int rd,
 708                              int rn, int rm, gen_helper_gvec_3_ptr *fn)
 709 {
 710     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 711                        vec_full_reg_offset(s, rn),
 712                        vec_full_reg_offset(s, rm), cpu_env,
 713                        is_q ? 16 : 8, vec_full_reg_size(s), 0, fn);
 714 }
 715
 716 /* Expand a 3-operand + fpstatus pointer + simd data value operation using
 717  * an out-of-line helper.
 718  */
 719 static void gen_gvec_op3_fpst(DisasContext *s, bool is_q, int rd, int rn,
 720                               int rm, bool is_fp16, int data,
 721                               gen_helper_gvec_3_ptr *fn)
 722 {
 723     TCGv_ptr fpst = get_fpstatus_ptr(is_fp16);
 724     tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
 725                        vec_full_reg_offset(s, rn),
 726                        vec_full_reg_offset(s, rm), fpst,
 727                        is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
 728     tcg_temp_free_ptr(fpst);
 729 }
 730
 731 /* Set ZF and NF based on a 64 bit result. This is alas fiddlier
 732  * than the 32 bit equivalent.
 733  */
 734 static inline void gen_set_NZ64(TCGv_i64 result)
 735 {
 736     tcg_gen_extr_i64_i32(cpu_ZF, cpu_NF, result);
 737     tcg_gen_or_i32(cpu_ZF, cpu_ZF, cpu_NF);
 738 }
 739
 740 /* Set NZCV as for a logical operation: NZ as per result, CV cleared. */
 741 static inline void gen_logic_CC(int sf, TCGv_i64 result)
 742 {
 743     if (sf) {
 744         gen_set_NZ64(result);
 745     } else {
 746         tcg_gen_extrl_i64_i32(cpu_ZF, result);
 747         tcg_gen_mov_i32(cpu_NF, cpu_ZF);
 748     }
 749     tcg_gen_movi_i32(cpu_CF, 0);
 750     tcg_gen_movi_i32(cpu_VF, 0);
 751 }
 752
 753 /* dest = T0 + T1; compute C, N, V and Z flags */
 754 static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 755 {
 756     if (sf) {
 757         TCGv_i64 result, flag, tmp;
 758         result = tcg_temp_new_i64();
 759         flag = tcg_temp_new_i64();
 760         tmp = tcg_temp_new_i64();
 761
 762         tcg_gen_movi_i64(tmp, 0);
 763         tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
 764
 765         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 766
 767         gen_set_NZ64(result);
 768
 769         tcg_gen_xor_i64(flag, result, t0);
 770         tcg_gen_xor_i64(tmp, t0, t1);
 771         tcg_gen_andc_i64(flag, flag, tmp);
 772         tcg_temp_free_i64(tmp);
 773         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 774
 775         tcg_gen_mov_i64(dest, result);
 776         tcg_temp_free_i64(result);
 777         tcg_temp_free_i64(flag);
 778     } else {
 779         /* 32 bit arithmetic */
 780         TCGv_i32 t0_32 = tcg_temp_new_i32();
 781         TCGv_i32 t1_32 = tcg_temp_new_i32();
 782         TCGv_i32 tmp = tcg_temp_new_i32();
 783
 784         tcg_gen_movi_i32(tmp, 0);
 785         tcg_gen_extrl_i64_i32(t0_32, t0);
 786         tcg_gen_extrl_i64_i32(t1_32, t1);
 787         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, t1_32, tmp);
 788         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 789         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 790         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 791         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 792         tcg_gen_extu_i32_i64(dest, cpu_NF);
 793
 794         tcg_temp_free_i32(tmp);
 795         tcg_temp_free_i32(t0_32);
 796         tcg_temp_free_i32(t1_32);
 797     }
 798 }
 799
 800 /* dest = T0 - T1; compute C, N, V and Z flags */
 801 static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 802 {
 803     if (sf) {
 804         /* 64 bit arithmetic */
 805         TCGv_i64 result, flag, tmp;
 806
 807         result = tcg_temp_new_i64();
 808         flag = tcg_temp_new_i64();
 809         tcg_gen_sub_i64(result, t0, t1);
 810
 811         gen_set_NZ64(result);
 812
 813         tcg_gen_setcond_i64(TCG_COND_GEU, flag, t0, t1);
 814         tcg_gen_extrl_i64_i32(cpu_CF, flag);
 815
 816         tcg_gen_xor_i64(flag, result, t0);
 817         tmp = tcg_temp_new_i64();
 818         tcg_gen_xor_i64(tmp, t0, t1);
 819         tcg_gen_and_i64(flag, flag, tmp);
 820         tcg_temp_free_i64(tmp);
 821         tcg_gen_extrh_i64_i32(cpu_VF, flag);
 822         tcg_gen_mov_i64(dest, result);
 823         tcg_temp_free_i64(flag);
 824         tcg_temp_free_i64(result);
 825     } else {
 826         /* 32 bit arithmetic */
 827         TCGv_i32 t0_32 = tcg_temp_new_i32();
 828         TCGv_i32 t1_32 = tcg_temp_new_i32();
 829         TCGv_i32 tmp;
 830
 831         tcg_gen_extrl_i64_i32(t0_32, t0);
 832         tcg_gen_extrl_i64_i32(t1_32, t1);
 833         tcg_gen_sub_i32(cpu_NF, t0_32, t1_32);
 834         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 835         tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0_32, t1_32);
 836         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 837         tmp = tcg_temp_new_i32();
 838         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 839         tcg_temp_free_i32(t0_32);
 840         tcg_temp_free_i32(t1_32);
 841         tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
 842         tcg_temp_free_i32(tmp);
 843         tcg_gen_extu_i32_i64(dest, cpu_NF);
 844     }
 845 }
 846
 847 /* dest = T0 + T1 + CF; do not compute flags. */
 848 static void gen_adc(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 849 {
 850     TCGv_i64 flag = tcg_temp_new_i64();
 851     tcg_gen_extu_i32_i64(flag, cpu_CF);
 852     tcg_gen_add_i64(dest, t0, t1);
 853     tcg_gen_add_i64(dest, dest, flag);
 854     tcg_temp_free_i64(flag);
 855
 856     if (!sf) {
 857         tcg_gen_ext32u_i64(dest, dest);
 858     }
 859 }
 860
 861 /* dest = T0 + T1 + CF; compute C, N, V and Z flags. */
 862 static void gen_adc_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
 863 {
 864     if (sf) {
 865         TCGv_i64 result, cf_64, vf_64, tmp;
 866         result = tcg_temp_new_i64();
 867         cf_64 = tcg_temp_new_i64();
 868         vf_64 = tcg_temp_new_i64();
 869         tmp = tcg_const_i64(0);
 870
 871         tcg_gen_extu_i32_i64(cf_64, cpu_CF);
 872         tcg_gen_add2_i64(result, cf_64, t0, tmp, cf_64, tmp);
 873         tcg_gen_add2_i64(result, cf_64, result, cf_64, t1, tmp);
 874         tcg_gen_extrl_i64_i32(cpu_CF, cf_64);
 875         gen_set_NZ64(result);
 876
 877         tcg_gen_xor_i64(vf_64, result, t0);
 878         tcg_gen_xor_i64(tmp, t0, t1);
 879         tcg_gen_andc_i64(vf_64, vf_64, tmp);
 880         tcg_gen_extrh_i64_i32(cpu_VF, vf_64);
 881
 882         tcg_gen_mov_i64(dest, result);
 883
 884         tcg_temp_free_i64(tmp);
 885         tcg_temp_free_i64(vf_64);
 886         tcg_temp_free_i64(cf_64);
 887         tcg_temp_free_i64(result);
 888     } else {
 889         TCGv_i32 t0_32, t1_32, tmp;
 890         t0_32 = tcg_temp_new_i32();
 891         t1_32 = tcg_temp_new_i32();
 892         tmp = tcg_const_i32(0);
 893
 894         tcg_gen_extrl_i64_i32(t0_32, t0);
 895         tcg_gen_extrl_i64_i32(t1_32, t1);
 896         tcg_gen_add2_i32(cpu_NF, cpu_CF, t0_32, tmp, cpu_CF, tmp);
 897         tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1_32, tmp);
 898
 899         tcg_gen_mov_i32(cpu_ZF, cpu_NF);
 900         tcg_gen_xor_i32(cpu_VF, cpu_NF, t0_32);
 901         tcg_gen_xor_i32(tmp, t0_32, t1_32);
 902         tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
 903         tcg_gen_extu_i32_i64(dest, cpu_NF);
 904
 905         tcg_temp_free_i32(tmp);
 906         tcg_temp_free_i32(t1_32);
 907         tcg_temp_free_i32(t0_32);
 908     }
 909 }
 910
 911 /*
 912  * Load/Store generators
 913  */
 914
 915 /*
 916  * Store from GPR register to memory.
 917  */
 918 static void do_gpr_st_memidx(DisasContext *s, TCGv_i64 source,
 919                              TCGv_i64 tcg_addr, int size, int memidx,
 920                              bool iss_valid,
 921                              unsigned int iss_srt,
 922                              bool iss_sf, bool iss_ar)
 923 {
 924     g_assert(size <= 3);
 925     tcg_gen_qemu_st_i64(source, tcg_addr, memidx, s->be_data + size);
 926
 927     if (iss_valid) {
 928         uint32_t syn;
 929
 930         syn = syn_data_abort_with_iss(0,
 931                                       size,
 932                                       false,
 933                                       iss_srt,
 934                                       iss_sf,
 935                                       iss_ar,
 936                                       0, 0, 0, 0, 0, false);
 937         disas_set_insn_syndrome(s, syn);
 938     }
 939 }
 940
 941 static void do_gpr_st(DisasContext *s, TCGv_i64 source,
 942                       TCGv_i64 tcg_addr, int size,
 943                       bool iss_valid,
 944                       unsigned int iss_srt,
 945                       bool iss_sf, bool iss_ar)
 946 {
 947     do_gpr_st_memidx(s, source, tcg_addr, size, get_mem_index(s),
 948                      iss_valid, iss_srt, iss_sf, iss_ar);
 949 }
 950
 951 /*
 952  * Load from memory to GPR register
 953  */
 954 static void do_gpr_ld_memidx(DisasContext *s,
 955                              TCGv_i64 dest, TCGv_i64 tcg_addr,
 956                              int size, bool is_signed,
 957                              bool extend, int memidx,
 958                              bool iss_valid, unsigned int iss_srt,
 959                              bool iss_sf, bool iss_ar)
 960 {
 961     TCGMemOp memop = s->be_data + size;
 962
 963     g_assert(size <= 3);
 964
 965     if (is_signed) {
 966         memop += MO_SIGN;
 967     }
 968
 969     tcg_gen_qemu_ld_i64(dest, tcg_addr, memidx, memop);
 970
 971     if (extend && is_signed) {
 972         g_assert(size < 3);
 973         tcg_gen_ext32u_i64(dest, dest);
 974     }
 975
 976     if (iss_valid) {
 977         uint32_t syn;
 978
 979         syn = syn_data_abort_with_iss(0,
 980                                       size,
 981                                       is_signed,
 982                                       iss_srt,
 983                                       iss_sf,
 984                                       iss_ar,
 985                                       0, 0, 0, 0, 0, false);
 986         disas_set_insn_syndrome(s, syn);
 987     }
 988 }
 989
 990 static void do_gpr_ld(DisasContext *s,
 991                       TCGv_i64 dest, TCGv_i64 tcg_addr,
 992                       int size, bool is_signed, bool extend,
 993                       bool iss_valid, unsigned int iss_srt,
 994                       bool iss_sf, bool iss_ar)
 995 {
 996     do_gpr_ld_memidx(s, dest, tcg_addr, size, is_signed, extend,
 997                      get_mem_index(s),
 998                      iss_valid, iss_srt, iss_sf, iss_ar);
 999 }
1000
1001 /*
1002  * Store from FP register to memory
1003  */
1004 static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
1005 {
1006     /* This writes the bottom N bits of a 128 bit wide vector to memory */
1007     TCGv_i64 tmp = tcg_temp_new_i64();
1008     tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
1009     if (size < 4) {
1010         tcg_gen_qemu_st_i64(tmp, tcg_addr, get_mem_index(s),
1011                             s->be_data + size);
1012     } else {
1013         bool be = s->be_data == MO_BE;
1014         TCGv_i64 tcg_hiaddr = tcg_temp_new_i64();
1015
1016         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
1017         tcg_gen_qemu_st_i64(tmp, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
1018                             s->be_data | MO_Q);
1019         tcg_gen_ld_i64(tmp, cpu_env, fp_reg_hi_offset(s, srcidx));
1020         tcg_gen_qemu_st_i64(tmp, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
1021                             s->be_data | MO_Q);
1022         tcg_temp_free_i64(tcg_hiaddr);
1023     }
1024
1025     tcg_temp_free_i64(tmp);
1026 }
1027
1028 /*
1029  * Load from memory to FP register
1030  */
1031 static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
1032 {
1033     /* This always zero-extends and writes to a full 128 bit wide vector */
1034     TCGv_i64 tmplo = tcg_temp_new_i64();
1035     TCGv_i64 tmphi;
1036
1037     if (size < 4) {
1038         TCGMemOp memop = s->be_data + size;
1039         tmphi = tcg_const_i64(0);
1040         tcg_gen_qemu_ld_i64(tmplo, tcg_addr, get_mem_index(s), memop);
1041     } else {
1042         bool be = s->be_data == MO_BE;
1043         TCGv_i64 tcg_hiaddr;
1044
1045         tmphi = tcg_temp_new_i64();
1046         tcg_hiaddr = tcg_temp_new_i64();
1047
1048         tcg_gen_addi_i64(tcg_hiaddr, tcg_addr, 8);
1049         tcg_gen_qemu_ld_i64(tmplo, be ? tcg_hiaddr : tcg_addr, get_mem_index(s),
1050                             s->be_data | MO_Q);
1051         tcg_gen_qemu_ld_i64(tmphi, be ? tcg_addr : tcg_hiaddr, get_mem_index(s),
1052                             s->be_data | MO_Q);
1053         tcg_temp_free_i64(tcg_hiaddr);
1054     }
1055
1056     tcg_gen_st_i64(tmplo, cpu_env, fp_reg_offset(s, destidx, MO_64));
1057     tcg_gen_st_i64(tmphi, cpu_env, fp_reg_hi_offset(s, destidx));
1058
1059     tcg_temp_free_i64(tmplo);
1060     tcg_temp_free_i64(tmphi);
1061
1062     clear_vec_high(s, true, destidx);
1063 }
1064
1065 /*
1066  * Vector load/store helpers.
1067  *
1068  * The principal difference between this and a FP load is that we don't
1069  * zero extend as we are filling a partial chunk of the vector register.
1070  * These functions don't support 128 bit loads/stores, which would be
1071  * normal load/store operations.
1072  *
1073  * The _i32 versions are useful when operating on 32 bit quantities
1074  * (eg for floating point single or using Neon helper functions).
1075  */
1076
1077 /* Get value of an element within a vector register */
1078 static void read_vec_element(DisasContext *s, TCGv_i64 tcg_dest, int srcidx,
1079                              int element, TCGMemOp memop)
1080 {
1081     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1082     switch (memop) {
1083     case MO_8:
1084         tcg_gen_ld8u_i64(tcg_dest, cpu_env, vect_off);
1085         break;
1086     case MO_16:
1087         tcg_gen_ld16u_i64(tcg_dest, cpu_env, vect_off);
1088         break;
1089     case MO_32:
1090         tcg_gen_ld32u_i64(tcg_dest, cpu_env, vect_off);
1091         break;
1092     case MO_8|MO_SIGN:
1093         tcg_gen_ld8s_i64(tcg_dest, cpu_env, vect_off);
1094         break;
1095     case MO_16|MO_SIGN:
1096         tcg_gen_ld16s_i64(tcg_dest, cpu_env, vect_off);
1097         break;
1098     case MO_32|MO_SIGN:
1099         tcg_gen_ld32s_i64(tcg_dest, cpu_env, vect_off);
1100         break;
1101     case MO_64:
1102     case MO_64|MO_SIGN:
1103         tcg_gen_ld_i64(tcg_dest, cpu_env, vect_off);
1104         break;
1105     default:
1106         g_assert_not_reached();
1107     }
1108 }
1109
1110 static void read_vec_element_i32(DisasContext *s, TCGv_i32 tcg_dest, int srcidx,
1111                                  int element, TCGMemOp memop)
1112 {
1113     int vect_off = vec_reg_offset(s, srcidx, element, memop & MO_SIZE);
1114     switch (memop) {
1115     case MO_8:
1116         tcg_gen_ld8u_i32(tcg_dest, cpu_env, vect_off);
1117         break;
1118     case MO_16:
1119         tcg_gen_ld16u_i32(tcg_dest, cpu_env, vect_off);
1120         break;
1121     case MO_8|MO_SIGN:
1122         tcg_gen_ld8s_i32(tcg_dest, cpu_env, vect_off);
1123         break;
1124     case MO_16|MO_SIGN:
1125         tcg_gen_ld16s_i32(tcg_dest, cpu_env, vect_off);
1126         break;
1127     case MO_32:
1128     case MO_32|MO_SIGN:
1129         tcg_gen_ld_i32(tcg_dest, cpu_env, vect_off);
1130         break;
1131     default:
1132         g_assert_not_reached();
1133     }
1134 }
1135
1136 /* Set value of an element within a vector register */
1137 static void write_vec_element(DisasContext *s, TCGv_i64 tcg_src, int destidx,
1138                               int element, TCGMemOp memop)
1139 {
1140     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1141     switch (memop) {
1142     case MO_8:
1143         tcg_gen_st8_i64(tcg_src, cpu_env, vect_off);
1144         break;
1145     case MO_16:
1146         tcg_gen_st16_i64(tcg_src, cpu_env, vect_off);
1147         break;
1148     case MO_32:
1149         tcg_gen_st32_i64(tcg_src, cpu_env, vect_off);
1150         break;
1151     case MO_64:
1152         tcg_gen_st_i64(tcg_src, cpu_env, vect_off);
1153         break;
1154     default:
1155         g_assert_not_reached();
1156     }
1157 }
1158
1159 static void write_vec_element_i32(DisasContext *s, TCGv_i32 tcg_src,
1160                                   int destidx, int element, TCGMemOp memop)
1161 {
1162     int vect_off = vec_reg_offset(s, destidx, element, memop & MO_SIZE);
1163     switch (memop) {
1164     case MO_8:
1165         tcg_gen_st8_i32(tcg_src, cpu_env, vect_off);
1166         break;
1167     case MO_16:
1168         tcg_gen_st16_i32(tcg_src, cpu_env, vect_off);
1169         break;
1170     case MO_32:
1171         tcg_gen_st_i32(tcg_src, cpu_env, vect_off);
1172         break;
1173     default:
1174         g_assert_not_reached();
1175     }
1176 }
1177
1178 /* Store from vector register to memory */
1179 static void do_vec_st(DisasContext *s, int srcidx, int element,
1180                       TCGv_i64 tcg_addr, int size)
1181 {
1182     TCGMemOp memop = s->be_data + size;
1183     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1184
1185     read_vec_element(s, tcg_tmp, srcidx, element, size);
1186     tcg_gen_qemu_st_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1187
1188     tcg_temp_free_i64(tcg_tmp);
1189 }
1190
1191 /* Load from memory to vector register */
1192 static void do_vec_ld(DisasContext *s, int destidx, int element,
1193                       TCGv_i64 tcg_addr, int size)
1194 {
1195     TCGMemOp memop = s->be_data + size;
1196     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
1197
1198     tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr, get_mem_index(s), memop);
1199     write_vec_element(s, tcg_tmp, destidx, element, size);
1200
1201     tcg_temp_free_i64(tcg_tmp);
1202 }
1203
1204 /* Check that FP/Neon access is enabled. If it is, return
1205  * true. If not, emit code to generate an appropriate exception,
1206  * and return false; the caller should not emit any code for
1207  * the instruction. Note that this check must happen after all
1208  * unallocated-encoding checks (otherwise the syndrome information
1209  * for the resulting exception will be incorrect).
1210  */
1211 static inline bool fp_access_check(DisasContext *s)
1212 {
1213     assert(!s->fp_access_checked);
1214     s->fp_access_checked = true;
1215
1216     if (!s->fp_excp_el) {
1217         return true;
1218     }
1219
1220     gen_exception_insn(s, 4, EXCP_UDEF, syn_fp_access_trap(1, 0xe, false),
1221                        s->fp_excp_el);
1222     return false;
1223 }
1224
1225 /* Check that SVE access is enabled.  If it is, return true.
1226  * If not, emit code to generate an appropriate exception and return false.
1227  */
1228 static inline bool sve_access_check(DisasContext *s)
1229 {
1230     if (s->sve_excp_el) {
1231         gen_exception_insn(s, 4, EXCP_UDEF, syn_sve_access_trap(),
1232                            s->sve_excp_el);
1233         return false;
1234     }
1235     return true;
1236 }
1237
1238 /*
1239  * This utility function is for doing register extension with an
1240  * optional shift. You will likely want to pass a temporary for the
1241  * destination register. See DecodeRegExtend() in the ARM ARM.
1242  */
1243 static void ext_and_shift_reg(TCGv_i64 tcg_out, TCGv_i64 tcg_in,
1244                               int option, unsigned int shift)
1245 {
1246     int extsize = extract32(option, 0, 2);
1247     bool is_signed = extract32(option, 2, 1);
1248
1249     if (is_signed) {
1250         switch (extsize) {
1251         case 0:
1252             tcg_gen_ext8s_i64(tcg_out, tcg_in);
1253             break;
1254         case 1:
1255             tcg_gen_ext16s_i64(tcg_out, tcg_in);
1256             break;
1257         case 2:
1258             tcg_gen_ext32s_i64(tcg_out, tcg_in);
1259             break;
1260         case 3:
1261             tcg_gen_mov_i64(tcg_out, tcg_in);
1262             break;
1263         }
1264     } else {
1265         switch (extsize) {
1266         case 0:
1267             tcg_gen_ext8u_i64(tcg_out, tcg_in);
1268             break;
1269         case 1:
1270             tcg_gen_ext16u_i64(tcg_out, tcg_in);
1271             break;
1272         case 2:
1273             tcg_gen_ext32u_i64(tcg_out, tcg_in);
1274             break;
1275         case 3:
1276             tcg_gen_mov_i64(tcg_out, tcg_in);
1277             break;
1278         }
1279     }
1280
1281     if (shift) {
1282         tcg_gen_shli_i64(tcg_out, tcg_out, shift);
1283     }
1284 }
1285
1286 static inline void gen_check_sp_alignment(DisasContext *s)
1287 {
1288     /* The AArch64 architecture mandates that (if enabled via PSTATE
1289      * or SCTLR bits) there is a check that SP is 16-aligned on every
1290      * SP-relative load or store (with an exception generated if it is not).
1291      * In line with general QEMU practice regarding misaligned accesses,
1292      * we omit these checks for the sake of guest program performance.
1293      * This function is provided as a hook so we can more easily add these
1294      * checks in future (possibly as a "favour catching guest program bugs
1295      * over speed" user selectable option).
1296      */
1297 }
1298
1299 /*
1300  * This provides a simple table based table lookup decoder. It is
1301  * intended to be used when the relevant bits for decode are too
1302  * awkwardly placed and switch/if based logic would be confusing and
1303  * deeply nested. Since it's a linear search through the table, tables
1304  * should be kept small.
1305  *
1306  * It returns the first handler where insn & mask == pattern, or
1307  * NULL if there is no match.
1308  * The table is terminated by an empty mask (i.e. 0)
1309  */
1310 static inline AArch64DecodeFn *lookup_disas_fn(const AArch64DecodeTable *table,
1311                                                uint32_t insn)
1312 {
1313     const AArch64DecodeTable *tptr = table;
1314
1315     while (tptr->mask) {
1316         if ((insn & tptr->mask) == tptr->pattern) {
1317             return tptr->disas_fn;
1318         }
1319         tptr++;
1320     }
1321     return NULL;
1322 }
1323
1324 /*
1325  * The instruction disassembly implemented here matches
1326  * the instruction encoding classifications in chapter C4
1327  * of the ARM Architecture Reference Manual (DDI0487B_a);
1328  * classification names and decode diagrams here should generally
1329  * match up with those in the manual.
1330  */
1331
1332 /* Unconditional branch (immediate)
1333  *   31  30       26 25                                  0
1334  * +----+-----------+-------------------------------------+
1335  * | op | 0 0 1 0 1 |                 imm26               |
1336  * +----+-----------+-------------------------------------+
1337  */
1338 static void disas_uncond_b_imm(DisasContext *s, uint32_t insn)
1339 {
1340     uint64_t addr = s->pc + sextract32(insn, 0, 26) * 4 - 4;
1341
1342     if (insn & (1U << 31)) {
1343         /* BL Branch with link */
1344         tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1345     }
1346
1347     /* B Branch / BL Branch with link */
1348     gen_goto_tb(s, 0, addr);
1349 }
1350
1351 /* Compare and branch (immediate)
1352  *   31  30         25  24  23                  5 4      0
1353  * +----+-------------+----+---------------------+--------+
1354  * | sf | 0 1 1 0 1 0 | op |         imm19       |   Rt   |
1355  * +----+-------------+----+---------------------+--------+
1356  */
1357 static void disas_comp_b_imm(DisasContext *s, uint32_t insn)
1358 {
1359     unsigned int sf, op, rt;
1360     uint64_t addr;
1361     TCGLabel *label_match;
1362     TCGv_i64 tcg_cmp;
1363
1364     sf = extract32(insn, 31, 1);
1365     op = extract32(insn, 24, 1); /* 0: CBZ; 1: CBNZ */
1366     rt = extract32(insn, 0, 5);
1367     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1368
1369     tcg_cmp = read_cpu_reg(s, rt, sf);
1370     label_match = gen_new_label();
1371
1372     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1373                         tcg_cmp, 0, label_match);
1374
1375     gen_goto_tb(s, 0, s->pc);
1376     gen_set_label(label_match);
1377     gen_goto_tb(s, 1, addr);
1378 }
1379
1380 /* Test and branch (immediate)
1381  *   31  30         25  24  23   19 18          5 4    0
1382  * +----+-------------+----+-------+-------------+------+
1383  * | b5 | 0 1 1 0 1 1 | op |  b40  |    imm14    |  Rt  |
1384  * +----+-------------+----+-------+-------------+------+
1385  */
1386 static void disas_test_b_imm(DisasContext *s, uint32_t insn)
1387 {
1388     unsigned int bit_pos, op, rt;
1389     uint64_t addr;
1390     TCGLabel *label_match;
1391     TCGv_i64 tcg_cmp;
1392
1393     bit_pos = (extract32(insn, 31, 1) << 5) | extract32(insn, 19, 5);
1394     op = extract32(insn, 24, 1); /* 0: TBZ; 1: TBNZ */
1395     addr = s->pc + sextract32(insn, 5, 14) * 4 - 4;
1396     rt = extract32(insn, 0, 5);
1397
1398     tcg_cmp = tcg_temp_new_i64();
1399     tcg_gen_andi_i64(tcg_cmp, cpu_reg(s, rt), (1ULL << bit_pos));
1400     label_match = gen_new_label();
1401     tcg_gen_brcondi_i64(op ? TCG_COND_NE : TCG_COND_EQ,
1402                         tcg_cmp, 0, label_match);
1403     tcg_temp_free_i64(tcg_cmp);
1404     gen_goto_tb(s, 0, s->pc);
1405     gen_set_label(label_match);
1406     gen_goto_tb(s, 1, addr);
1407 }
1408
1409 /* Conditional branch (immediate)
1410  *  31           25  24  23                  5   4  3    0
1411  * +---------------+----+---------------------+----+------+
1412  * | 0 1 0 1 0 1 0 | o1 |         imm19       | o0 | cond |
1413  * +---------------+----+---------------------+----+------+
1414  */
1415 static void disas_cond_b_imm(DisasContext *s, uint32_t insn)
1416 {
1417     unsigned int cond;
1418     uint64_t addr;
1419
1420     if ((insn & (1 << 4)) || (insn & (1 << 24))) {
1421         unallocated_encoding(s);
1422         return;
1423     }
1424     addr = s->pc + sextract32(insn, 5, 19) * 4 - 4;
1425     cond = extract32(insn, 0, 4);
1426
1427     if (cond < 0x0e) {
1428         /* genuinely conditional branches */
1429         TCGLabel *label_match = gen_new_label();
1430         arm_gen_test_cc(cond, label_match);
1431         gen_goto_tb(s, 0, s->pc);
1432         gen_set_label(label_match);
1433         gen_goto_tb(s, 1, addr);
1434     } else {
1435         /* 0xe and 0xf are both "always" conditions */
1436         gen_goto_tb(s, 0, addr);
1437     }
1438 }
1439
1440 /* HINT instruction group, including various allocated HINTs */
1441 static void handle_hint(DisasContext *s, uint32_t insn,
1442                         unsigned int op1, unsigned int op2, unsigned int crm)
1443 {
1444     unsigned int selector = crm << 3 | op2;
1445
1446     if (op1 != 3) {
1447         unallocated_encoding(s);
1448         return;
1449     }
1450
1451     switch (selector) {
1452     case 0: /* NOP */
1453         return;
1454     case 3: /* WFI */
1455         s->base.is_jmp = DISAS_WFI;
1456         return;
1457         /* When running in MTTCG we don't generate jumps to the yield and
1458          * WFE helpers as it won't affect the scheduling of other vCPUs.
1459          * If we wanted to more completely model WFE/SEV so we don't busy
1460          * spin unnecessarily we would need to do something more involved.
1461          */
1462     case 1: /* YIELD */
1463         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1464             s->base.is_jmp = DISAS_YIELD;
1465         }
1466         return;
1467     case 2: /* WFE */
1468         if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
1469             s->base.is_jmp = DISAS_WFE;
1470         }
1471         return;
1472     case 4: /* SEV */
1473     case 5: /* SEVL */
1474         /* we treat all as NOP at least for now */
1475         return;
1476     default:
1477         /* default specified as NOP equivalent */
1478         return;
1479     }
1480 }
1481
1482 static void gen_clrex(DisasContext *s, uint32_t insn)
1483 {
1484     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
1485 }
1486
1487 /* CLREX, DSB, DMB, ISB */
1488 static void handle_sync(DisasContext *s, uint32_t insn,
1489                         unsigned int op1, unsigned int op2, unsigned int crm)
1490 {
1491     TCGBar bar;
1492
1493     if (op1 != 3) {
1494         unallocated_encoding(s);
1495         return;
1496     }
1497
1498     switch (op2) {
1499     case 2: /* CLREX */
1500         gen_clrex(s, insn);
1501         return;
1502     case 4: /* DSB */
1503     case 5: /* DMB */
1504         switch (crm & 3) {
1505         case 1: /* MBReqTypes_Reads */
1506             bar = TCG_BAR_SC | TCG_MO_LD_LD | TCG_MO_LD_ST;
1507             break;
1508         case 2: /* MBReqTypes_Writes */
1509             bar = TCG_BAR_SC | TCG_MO_ST_ST;
1510             break;
1511         default: /* MBReqTypes_All */
1512             bar = TCG_BAR_SC | TCG_MO_ALL;
1513             break;
1514         }
1515         tcg_gen_mb(bar);
1516         return;
1517     case 6: /* ISB */
1518         /* We need to break the TB after this insn to execute
1519          * a self-modified code correctly and also to take
1520          * any pending interrupts immediately.
1521          */
1522         gen_goto_tb(s, 0, s->pc);
1523         return;
1524     default:
1525         unallocated_encoding(s);
1526         return;
1527     }
1528 }
1529
1530 /* MSR (immediate) - move immediate to processor state field */
1531 static void handle_msr_i(DisasContext *s, uint32_t insn,
1532                          unsigned int op1, unsigned int op2, unsigned int crm)
1533 {
1534     int op = op1 << 3 | op2;
1535     switch (op) {
1536     case 0x05: /* SPSel */
1537         if (s->current_el == 0) {
1538             unallocated_encoding(s);
1539             return;
1540         }
1541         /* fall through */
1542     case 0x1e: /* DAIFSet */
1543     case 0x1f: /* DAIFClear */
1544     {
1545         TCGv_i32 tcg_imm = tcg_const_i32(crm);
1546         TCGv_i32 tcg_op = tcg_const_i32(op);
1547         gen_a64_set_pc_im(s->pc - 4);
1548         gen_helper_msr_i_pstate(cpu_env, tcg_op, tcg_imm);
1549         tcg_temp_free_i32(tcg_imm);
1550         tcg_temp_free_i32(tcg_op);
1551         /* For DAIFClear, exit the cpu loop to re-evaluate pending IRQs.  */
1552         gen_a64_set_pc_im(s->pc);
1553         s->base.is_jmp = (op == 0x1f ? DISAS_EXIT : DISAS_JUMP);
1554         break;
1555     }
1556     default:
1557         unallocated_encoding(s);
1558         return;
1559     }
1560 }
1561
1562 static void gen_get_nzcv(TCGv_i64 tcg_rt)
1563 {
1564     TCGv_i32 tmp = tcg_temp_new_i32();
1565     TCGv_i32 nzcv = tcg_temp_new_i32();
1566
1567     /* build bit 31, N */
1568     tcg_gen_andi_i32(nzcv, cpu_NF, (1U << 31));
1569     /* build bit 30, Z */
1570     tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_ZF, 0);
1571     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 30, 1);
1572     /* build bit 29, C */
1573     tcg_gen_deposit_i32(nzcv, nzcv, cpu_CF, 29, 1);
1574     /* build bit 28, V */
1575     tcg_gen_shri_i32(tmp, cpu_VF, 31);
1576     tcg_gen_deposit_i32(nzcv, nzcv, tmp, 28, 1);
1577     /* generate result */
1578     tcg_gen_extu_i32_i64(tcg_rt, nzcv);
1579
1580     tcg_temp_free_i32(nzcv);
1581     tcg_temp_free_i32(tmp);
1582 }
1583
1584 static void gen_set_nzcv(TCGv_i64 tcg_rt)
1585
1586 {
1587     TCGv_i32 nzcv = tcg_temp_new_i32();
1588
1589     /* take NZCV from R[t] */
1590     tcg_gen_extrl_i64_i32(nzcv, tcg_rt);
1591
1592     /* bit 31, N */
1593     tcg_gen_andi_i32(cpu_NF, nzcv, (1U << 31));
1594     /* bit 30, Z */
1595     tcg_gen_andi_i32(cpu_ZF, nzcv, (1 << 30));
1596     tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_ZF, cpu_ZF, 0);
1597     /* bit 29, C */
1598     tcg_gen_andi_i32(cpu_CF, nzcv, (1 << 29));
1599     tcg_gen_shri_i32(cpu_CF, cpu_CF, 29);
1600     /* bit 28, V */
1601     tcg_gen_andi_i32(cpu_VF, nzcv, (1 << 28));
1602     tcg_gen_shli_i32(cpu_VF, cpu_VF, 3);
1603     tcg_temp_free_i32(nzcv);
1604 }
1605
1606 /* MRS - move from system register
1607  * MSR (register) - move to system register
1608  * SYS
1609  * SYSL
1610  * These are all essentially the same insn in 'read' and 'write'
1611  * versions, with varying op0 fields.
1612  */
1613 static void handle_sys(DisasContext *s, uint32_t insn, bool isread,
1614                        unsigned int op0, unsigned int op1, unsigned int op2,
1615                        unsigned int crn, unsigned int crm, unsigned int rt)
1616 {
1617     const ARMCPRegInfo *ri;
1618     TCGv_i64 tcg_rt;
1619
1620     ri = get_arm_cp_reginfo(s->cp_regs,
1621                             ENCODE_AA64_CP_REG(CP_REG_ARM64_SYSREG_CP,
1622                                                crn, crm, op0, op1, op2));
1623
1624     if (!ri) {
1625         /* Unknown register; this might be a guest error or a QEMU
1626          * unimplemented feature.
1627          */
1628         qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch64 "
1629                       "system register op0:%d op1:%d crn:%d crm:%d op2:%d\n",
1630                       isread ? "read" : "write", op0, op1, crn, crm, op2);
1631         unallocated_encoding(s);
1632         return;
1633     }
1634
1635     /* Check access permissions */
1636     if (!cp_access_ok(s->current_el, ri, isread)) {
1637         unallocated_encoding(s);
1638         return;
1639     }
1640
1641     if (ri->accessfn) {
1642         /* Emit code to perform further access permissions checks at
1643          * runtime; this may result in an exception.
1644          */
1645         TCGv_ptr tmpptr;
1646         TCGv_i32 tcg_syn, tcg_isread;
1647         uint32_t syndrome;
1648
1649         gen_a64_set_pc_im(s->pc - 4);
1650         tmpptr = tcg_const_ptr(ri);
1651         syndrome = syn_aa64_sysregtrap(op0, op1, op2, crn, crm, rt, isread);
1652         tcg_syn = tcg_const_i32(syndrome);
1653         tcg_isread = tcg_const_i32(isread);
1654         gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn, tcg_isread);
1655         tcg_temp_free_ptr(tmpptr);
1656         tcg_temp_free_i32(tcg_syn);
1657         tcg_temp_free_i32(tcg_isread);
1658     }
1659
1660     /* Handle special cases first */
1661     switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
1662     case ARM_CP_NOP:
1663         return;
1664     case ARM_CP_NZCV:
1665         tcg_rt = cpu_reg(s, rt);
1666         if (isread) {
1667             gen_get_nzcv(tcg_rt);
1668         } else {
1669             gen_set_nzcv(tcg_rt);
1670         }
1671         return;
1672     case ARM_CP_CURRENTEL:
1673         /* Reads as current EL value from pstate, which is
1674          * guaranteed to be constant by the tb flags.
1675          */
1676         tcg_rt = cpu_reg(s, rt);
1677         tcg_gen_movi_i64(tcg_rt, s->current_el << 2);
1678         return;
1679     case ARM_CP_DC_ZVA:
1680         /* Writes clear the aligned block of memory which rt points into. */
1681         tcg_rt = cpu_reg(s, rt);
1682         gen_helper_dc_zva(cpu_env, tcg_rt);
1683         return;
1684     default:
1685         break;
1686     }
1687     if ((ri->type & ARM_CP_SVE) && !sve_access_check(s)) {
1688         return;
1689     }
1690     if ((ri->type & ARM_CP_FPU) && !fp_access_check(s)) {
1691         return;
1692     }
1693
1694     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1695         gen_io_start();
1696     }
1697
1698     tcg_rt = cpu_reg(s, rt);
1699
1700     if (isread) {
1701         if (ri->type & ARM_CP_CONST) {
1702             tcg_gen_movi_i64(tcg_rt, ri->resetvalue);
1703         } else if (ri->readfn) {
1704             TCGv_ptr tmpptr;
1705             tmpptr = tcg_const_ptr(ri);
1706             gen_helper_get_cp_reg64(tcg_rt, cpu_env, tmpptr);
1707             tcg_temp_free_ptr(tmpptr);
1708         } else {
1709             tcg_gen_ld_i64(tcg_rt, cpu_env, ri->fieldoffset);
1710         }
1711     } else {
1712         if (ri->type & ARM_CP_CONST) {
1713             /* If not forbidden by access permissions, treat as WI */
1714             return;
1715         } else if (ri->writefn) {
1716             TCGv_ptr tmpptr;
1717             tmpptr = tcg_const_ptr(ri);
1718             gen_helper_set_cp_reg64(cpu_env, tmpptr, tcg_rt);
1719             tcg_temp_free_ptr(tmpptr);
1720         } else {
1721             tcg_gen_st_i64(tcg_rt, cpu_env, ri->fieldoffset);
1722         }
1723     }
1724
1725     if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
1726         /* I/O operations must end the TB here (whether read or write) */
1727         gen_io_end();
1728         s->base.is_jmp = DISAS_UPDATE;
1729     } else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
1730         /* We default to ending the TB on a coprocessor register write,
1731          * but allow this to be suppressed by the register definition
1732          * (usually only necessary to work around guest bugs).
1733          */
1734         s->base.is_jmp = DISAS_UPDATE;
1735     }
1736 }
1737
1738 /* System
1739  *  31                 22 21  20 19 18 16 15   12 11    8 7   5 4    0
1740  * +---------------------+---+-----+-----+-------+-------+-----+------+
1741  * | 1 1 0 1 0 1 0 1 0 0 | L | op0 | op1 |  CRn  |  CRm  | op2 |  Rt  |
1742  * +---------------------+---+-----+-----+-------+-------+-----+------+
1743  */
1744 static void disas_system(DisasContext *s, uint32_t insn)
1745 {
1746     unsigned int l, op0, op1, crn, crm, op2, rt;
1747     l = extract32(insn, 21, 1);
1748     op0 = extract32(insn, 19, 2);
1749     op1 = extract32(insn, 16, 3);
1750     crn = extract32(insn, 12, 4);
1751     crm = extract32(insn, 8, 4);
1752     op2 = extract32(insn, 5, 3);
1753     rt = extract32(insn, 0, 5);
1754
1755     if (op0 == 0) {
1756         if (l || rt != 31) {
1757             unallocated_encoding(s);
1758             return;
1759         }
1760         switch (crn) {
1761         case 2: /* HINT (including allocated hints like NOP, YIELD, etc) */
1762             handle_hint(s, insn, op1, op2, crm);
1763             break;
1764         case 3: /* CLREX, DSB, DMB, ISB */
1765             handle_sync(s, insn, op1, op2, crm);
1766             break;
1767         case 4: /* MSR (immediate) */
1768             handle_msr_i(s, insn, op1, op2, crm);
1769             break;
1770         default:
1771             unallocated_encoding(s);
1772             break;
1773         }
1774         return;
1775     }
1776     handle_sys(s, insn, l, op0, op1, op2, crn, crm, rt);
1777 }
1778
1779 /* Exception generation
1780  *
1781  *  31             24 23 21 20                     5 4   2 1  0
1782  * +-----------------+-----+------------------------+-----+----+
1783  * | 1 1 0 1 0 1 0 0 | opc |          imm16         | op2 | LL |
1784  * +-----------------------+------------------------+----------+
1785  */
1786 static void disas_exc(DisasContext *s, uint32_t insn)
1787 {
1788     int opc = extract32(insn, 21, 3);
1789     int op2_ll = extract32(insn, 0, 5);
1790     int imm16 = extract32(insn, 5, 16);
1791     TCGv_i32 tmp;
1792
1793     switch (opc) {
1794     case 0:
1795         /* For SVC, HVC and SMC we advance the single-step state
1796          * machine before taking the exception. This is architecturally
1797          * mandated, to ensure that single-stepping a system call
1798          * instruction works properly.
1799          */
1800         switch (op2_ll) {
1801         case 1:                                                     /* SVC */
1802             gen_ss_advance(s);
1803             gen_exception_insn(s, 0, EXCP_SWI, syn_aa64_svc(imm16),
1804                                default_exception_el(s));
1805             break;
1806         case 2:                                                     /* HVC */
1807             if (s->current_el == 0) {
1808                 unallocated_encoding(s);
1809                 break;
1810             }
1811             /* The pre HVC helper handles cases when HVC gets trapped
1812              * as an undefined insn by runtime configuration.
1813              */
1814             gen_a64_set_pc_im(s->pc - 4);
1815             gen_helper_pre_hvc(cpu_env);
1816             gen_ss_advance(s);
1817             gen_exception_insn(s, 0, EXCP_HVC, syn_aa64_hvc(imm16), 2);
1818             break;
1819         case 3:                                                     /* SMC */
1820             if (s->current_el == 0) {
1821                 unallocated_encoding(s);
1822                 break;
1823             }
1824             gen_a64_set_pc_im(s->pc - 4);
1825             tmp = tcg_const_i32(syn_aa64_smc(imm16));
1826             gen_helper_pre_smc(cpu_env, tmp);
1827             tcg_temp_free_i32(tmp);
1828             gen_ss_advance(s);
1829             gen_exception_insn(s, 0, EXCP_SMC, syn_aa64_smc(imm16), 3);
1830             break;
1831         default:
1832             unallocated_encoding(s);
1833             break;
1834         }
1835         break;
1836     case 1:
1837         if (op2_ll != 0) {
1838             unallocated_encoding(s);
1839             break;
1840         }
1841         /* BRK */
1842         gen_exception_insn(s, 4, EXCP_BKPT, syn_aa64_bkpt(imm16),
1843                            default_exception_el(s));
1844         break;
1845     case 2:
1846         if (op2_ll != 0) {
1847             unallocated_encoding(s);
1848             break;
1849         }
1850         /* HLT. This has two purposes.
1851          * Architecturally, it is an external halting debug instruction.
1852          * Since QEMU doesn't implement external debug, we treat this as
1853          * it is required for halting debug disabled: it will UNDEF.
1854          * Secondly, "HLT 0xf000" is the A64 semihosting syscall instruction.
1855          */
1856         if (semihosting_enabled() && imm16 == 0xf000) {
1857 #ifndef CONFIG_USER_ONLY
1858             /* In system mode, don't allow userspace access to semihosting,
1859              * to provide some semblance of security (and for consistency
1860              * with our 32-bit semihosting).
1861              */
1862             if (s->current_el == 0) {
1863                 unsupported_encoding(s, insn);
1864                 break;
1865             }
1866 #endif
1867             gen_exception_internal_insn(s, 0, EXCP_SEMIHOST);
1868         } else {
1869             unsupported_encoding(s, insn);
1870         }
1871         break;
1872     case 5:
1873         if (op2_ll < 1 || op2_ll > 3) {
1874             unallocated_encoding(s);
1875             break;
1876         }
1877         /* DCPS1, DCPS2, DCPS3 */
1878         unsupported_encoding(s, insn);
1879         break;
1880     default:
1881         unallocated_encoding(s);
1882         break;
1883     }
1884 }
1885
1886 /* Unconditional branch (register)
1887  *  31           25 24   21 20   16 15   10 9    5 4     0
1888  * +---------------+-------+-------+-------+------+-------+
1889  * | 1 1 0 1 0 1 1 |  opc  |  op2  |  op3  |  Rn  |  op4  |
1890  * +---------------+-------+-------+-------+------+-------+
1891  */
1892 static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
1893 {
1894     unsigned int opc, op2, op3, rn, op4;
1895
1896     opc = extract32(insn, 21, 4);
1897     op2 = extract32(insn, 16, 5);
1898     op3 = extract32(insn, 10, 6);
1899     rn = extract32(insn, 5, 5);
1900     op4 = extract32(insn, 0, 5);
1901
1902     if (op4 != 0x0 || op3 != 0x0 || op2 != 0x1f) {
1903         unallocated_encoding(s);
1904         return;
1905     }
1906
1907     switch (opc) {
1908     case 0: /* BR */
1909     case 1: /* BLR */
1910     case 2: /* RET */
1911         gen_a64_set_pc(s, cpu_reg(s, rn));
1912         /* BLR also needs to load return address */
1913         if (opc == 1) {
1914             tcg_gen_movi_i64(cpu_reg(s, 30), s->pc);
1915         }
1916         break;
1917     case 4: /* ERET */
1918         if (s->current_el == 0) {
1919             unallocated_encoding(s);
1920             return;
1921         }
1922         gen_helper_exception_return(cpu_env);
1923         /* Must exit loop to check un-masked IRQs */
1924         s->base.is_jmp = DISAS_EXIT;
1925         return;
1926     case 5: /* DRPS */
1927         if (rn != 0x1f) {
1928             unallocated_encoding(s);
1929         } else {
1930             unsupported_encoding(s, insn);
1931         }
1932         return;
1933     default:
1934         unallocated_encoding(s);
1935         return;
1936     }
1937
1938     s->base.is_jmp = DISAS_JUMP;
1939 }
1940
1941 /* Branches, exception generating and system instructions */
1942 static void disas_b_exc_sys(DisasContext *s, uint32_t insn)
1943 {
1944     switch (extract32(insn, 25, 7)) {
1945     case 0x0a: case 0x0b:
1946     case 0x4a: case 0x4b: /* Unconditional branch (immediate) */
1947         disas_uncond_b_imm(s, insn);
1948         break;
1949     case 0x1a: case 0x5a: /* Compare & branch (immediate) */
1950         disas_comp_b_imm(s, insn);
1951         break;
1952     case 0x1b: case 0x5b: /* Test & branch (immediate) */
1953         disas_test_b_imm(s, insn);
1954         break;
1955     case 0x2a: /* Conditional branch (immediate) */
1956         disas_cond_b_imm(s, insn);
1957         break;
1958     case 0x6a: /* Exception generation / System */
1959         if (insn & (1 << 24)) {
1960             disas_system(s, insn);
1961         } else {
1962             disas_exc(s, insn);
1963         }
1964         break;
1965     case 0x6b: /* Unconditional branch (register) */
1966         disas_uncond_b_reg(s, insn);
1967         break;
1968     default:
1969         unallocated_encoding(s);
1970         break;
1971     }
1972 }
1973
1974 /*
1975  * Load/Store exclusive instructions are implemented by remembering
1976  * the value/address loaded, and seeing if these are the same
1977  * when the store is performed. This is not actually the architecturally
1978  * mandated semantics, but it works for typical guest code sequences
1979  * and avoids having to monitor regular stores.
1980  *
1981  * The store exclusive uses the atomic cmpxchg primitives to avoid
1982  * races in multi-threaded linux-user and when MTTCG softmmu is
1983  * enabled.
1984  */
1985 static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
1986                                TCGv_i64 addr, int size, bool is_pair)
1987 {
1988     int idx = get_mem_index(s);
1989     TCGMemOp memop = s->be_data;
1990
1991     g_assert(size <= 3);
1992     if (is_pair) {
1993         g_assert(size >= 2);
1994         if (size == 2) {
1995             /* The pair must be single-copy atomic for the doubleword.  */
1996             memop |= MO_64 | MO_ALIGN;
1997             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
1998             if (s->be_data == MO_LE) {
1999                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 0, 32);
2000                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 32, 32);
2001             } else {
2002                 tcg_gen_extract_i64(cpu_reg(s, rt), cpu_exclusive_val, 32, 32);
2003                 tcg_gen_extract_i64(cpu_reg(s, rt2), cpu_exclusive_val, 0, 32);
2004             }
2005         } else {
2006             /* The pair must be single-copy atomic for *each* doubleword, not
2007                the entire quadword, however it must be quadword aligned.  */
2008             memop |= MO_64;
2009             tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx,
2010                                 memop | MO_ALIGN_16);
2011
2012             TCGv_i64 addr2 = tcg_temp_new_i64();
2013             tcg_gen_addi_i64(addr2, addr, 8);
2014             tcg_gen_qemu_ld_i64(cpu_exclusive_high, addr2, idx, memop);
2015             tcg_temp_free_i64(addr2);
2016
2017             tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2018             tcg_gen_mov_i64(cpu_reg(s, rt2), cpu_exclusive_high);
2019         }
2020     } else {
2021         memop |= size | MO_ALIGN;
2022         tcg_gen_qemu_ld_i64(cpu_exclusive_val, addr, idx, memop);
2023         tcg_gen_mov_i64(cpu_reg(s, rt), cpu_exclusive_val);
2024     }
2025     tcg_gen_mov_i64(cpu_exclusive_addr, addr);
2026 }
2027
2028 static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
2029                                 TCGv_i64 addr, int size, int is_pair)
2030 {
2031     /* if (env->exclusive_addr == addr && env->exclusive_val == [addr]
2032      *     && (!is_pair || env->exclusive_high == [addr + datasize])) {
2033      *     [addr] = {Rt};
2034      *     if (is_pair) {
2035      *         [addr + datasize] = {Rt2};
2036      *     }
2037      *     {Rd} = 0;
2038      * } else {
2039      *     {Rd} = 1;
2040      * }
2041      * env->exclusive_addr = -1;
2042      */
2043     TCGLabel *fail_label = gen_new_label();
2044     TCGLabel *done_label = gen_new_label();
2045     TCGv_i64 tmp;
2046
2047     tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_exclusive_addr, fail_label);
2048
2049     tmp = tcg_temp_new_i64();
2050     if (is_pair) {
2051         if (size == 2) {
2052             if (s->be_data == MO_LE) {
2053                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt), cpu_reg(s, rt2));
2054             } else {
2055                 tcg_gen_concat32_i64(tmp, cpu_reg(s, rt2), cpu_reg(s, rt));
2056             }
2057             tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr,
2058                                        cpu_exclusive_val, tmp,
2059                                        get_mem_index(s),
2060                                        MO_64 | MO_ALIGN | s->be_data);
2061             tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2062         } else if (s->be_data == MO_LE) {
2063             if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2064                 gen_helper_paired_cmpxchg64_le_parallel(tmp, cpu_env,
2065                                                         cpu_exclusive_addr,
2066                                                         cpu_reg(s, rt),
2067                                                         cpu_reg(s, rt2));
2068             } else {
2069                 gen_helper_paired_cmpxchg64_le(tmp, cpu_env, cpu_exclusive_addr,
2070                                                cpu_reg(s, rt), cpu_reg(s, rt2));
2071             }
2072         } else {
2073             if (tb_cflags(s->base.tb) & CF_PARALLEL) {
2074                 gen_helper_paired_cmpxchg64_be_parallel(tmp, cpu_env,
2075                                                         cpu_exclusive_addr,
2076                                                         cpu_reg(s, rt),
2077                                                         cpu_reg(s, rt2));
2078             } else {
2079                 gen_helper_paired_cmpxchg64_be(tmp, cpu_env, cpu_exclusive_addr,
2080                                                cpu_reg(s, rt), cpu_reg(s, rt2));
2081             }
2082         }
2083     } else {
2084         tcg_gen_atomic_cmpxchg_i64(tmp, cpu_exclusive_addr, cpu_exclusive_val,
2085                                    cpu_reg(s, rt), get_mem_index(s),
2086                                    size | MO_ALIGN | s->be_data);
2087         tcg_gen_setcond_i64(TCG_COND_NE, tmp, tmp, cpu_exclusive_val);
2088     }
2089     tcg_gen_mov_i64(cpu_reg(s, rd), tmp);
2090     tcg_temp_free_i64(tmp);
2091     tcg_gen_br(done_label);
2092
2093     gen_set_label(fail_label);
2094     tcg_gen_movi_i64(cpu_reg(s, rd), 1);
2095     gen_set_label(done_label);
2096     tcg_gen_movi_i64(cpu_exclusive_addr, -1);
2097 }
2098
2099 /* Update the Sixty-Four bit (SF) registersize. This logic is derived
2100  * from the ARMv8 specs for LDR (Shared decode for all encodings).
2101  */
2102 static bool disas_ldst_compute_iss_sf(int size, bool is_signed, int opc)
2103 {
2104     int opc0 = extract32(opc, 0, 1);
2105     int regsize;
2106
2107     if (is_signed) {
2108         regsize = opc0 ? 32 : 64;
2109     } else {
2110         regsize = size == 3 ? 64 : 32;
2111     }
2112     return regsize == 64;
2113 }
2114
2115 /* Load/store exclusive
2116  *
2117  *  31 30 29         24  23  22   21  20  16  15  14   10 9    5 4    0
2118  * +-----+-------------+----+---+----+------+----+-------+------+------+
2119  * | sz  | 0 0 1 0 0 0 | o2 | L | o1 |  Rs  | o0 |  Rt2  |  Rn  | Rt   |
2120  * +-----+-------------+----+---+----+------+----+-------+------+------+
2121  *
2122  *  sz: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64 bit
2123  *   L: 0 -> store, 1 -> load
2124  *  o2: 0 -> exclusive, 1 -> not
2125  *  o1: 0 -> single register, 1 -> register pair
2126  *  o0: 1 -> load-acquire/store-release, 0 -> not
2127  */
2128 static void disas_ldst_excl(DisasContext *s, uint32_t insn)
2129 {
2130     int rt = extract32(insn, 0, 5);
2131     int rn = extract32(insn, 5, 5);
2132     int rt2 = extract32(insn, 10, 5);
2133     int is_lasr = extract32(insn, 15, 1);
2134     int rs = extract32(insn, 16, 5);
2135     int is_pair = extract32(insn, 21, 1);
2136     int is_store = !extract32(insn, 22, 1);
2137     int is_excl = !extract32(insn, 23, 1);
2138     int size = extract32(insn, 30, 2);
2139     TCGv_i64 tcg_addr;
2140
2141     if ((!is_excl && !is_pair && !is_lasr) ||
2142         (!is_excl && is_pair) ||
2143         (is_pair && size < 2)) {
2144         unallocated_encoding(s);
2145         return;
2146     }
2147
2148     if (rn == 31) {
2149         gen_check_sp_alignment(s);
2150     }
2151     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2152
2153     /* Note that since TCG is single threaded load-acquire/store-release
2154      * semantics require no extra if (is_lasr) { ... } handling.
2155      */
2156
2157     if (is_excl) {
2158         if (!is_store) {
2159             s->is_ldex = true;
2160             gen_load_exclusive(s, rt, rt2, tcg_addr, size, is_pair);
2161             if (is_lasr) {
2162                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2163             }
2164         } else {
2165             if (is_lasr) {
2166                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2167             }
2168             gen_store_exclusive(s, rs, rt, rt2, tcg_addr, size, is_pair);
2169         }
2170     } else {
2171         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2172         bool iss_sf = disas_ldst_compute_iss_sf(size, false, 0);
2173
2174         /* Generate ISS for non-exclusive accesses including LASR.  */
2175         if (is_store) {
2176             if (is_lasr) {
2177                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_STRL);
2178             }
2179             do_gpr_st(s, tcg_rt, tcg_addr, size,
2180                       true, rt, iss_sf, is_lasr);
2181         } else {
2182             do_gpr_ld(s, tcg_rt, tcg_addr, size, false, false,
2183                       true, rt, iss_sf, is_lasr);
2184             if (is_lasr) {
2185                 tcg_gen_mb(TCG_MO_ALL | TCG_BAR_LDAQ);
2186             }
2187         }
2188     }
2189 }
2190
2191 /*
2192  * Load register (literal)
2193  *
2194  *  31 30 29   27  26 25 24 23                5 4     0
2195  * +-----+-------+---+-----+-------------------+-------+
2196  * | opc | 0 1 1 | V | 0 0 |     imm19         |  Rt   |
2197  * +-----+-------+---+-----+-------------------+-------+
2198  *
2199  * V: 1 -> vector (simd/fp)
2200  * opc (non-vector): 00 -> 32 bit, 01 -> 64 bit,
2201  *                   10-> 32 bit signed, 11 -> prefetch
2202  * opc (vector): 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit (11 unallocated)
2203  */
2204 static void disas_ld_lit(DisasContext *s, uint32_t insn)
2205 {
2206     int rt = extract32(insn, 0, 5);
2207     int64_t imm = sextract32(insn, 5, 19) << 2;
2208     bool is_vector = extract32(insn, 26, 1);
2209     int opc = extract32(insn, 30, 2);
2210     bool is_signed = false;
2211     int size = 2;
2212     TCGv_i64 tcg_rt, tcg_addr;
2213
2214     if (is_vector) {
2215         if (opc == 3) {
2216             unallocated_encoding(s);
2217             return;
2218         }
2219         size = 2 + opc;
2220         if (!fp_access_check(s)) {
2221             return;
2222         }
2223     } else {
2224         if (opc == 3) {
2225             /* PRFM (literal) : prefetch */
2226             return;
2227         }
2228         size = 2 + extract32(opc, 0, 1);
2229         is_signed = extract32(opc, 1, 1);
2230     }
2231
2232     tcg_rt = cpu_reg(s, rt);
2233
2234     tcg_addr = tcg_const_i64((s->pc - 4) + imm);
2235     if (is_vector) {
2236         do_fp_ld(s, rt, tcg_addr, size);
2237     } else {
2238         /* Only unsigned 32bit loads target 32bit registers.  */
2239         bool iss_sf = opc != 0;
2240
2241         do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, false,
2242                   true, rt, iss_sf, false);
2243     }
2244     tcg_temp_free_i64(tcg_addr);
2245 }
2246
2247 /*
2248  * LDNP (Load Pair - non-temporal hint)
2249  * LDP (Load Pair - non vector)
2250  * LDPSW (Load Pair Signed Word - non vector)
2251  * STNP (Store Pair - non-temporal hint)
2252  * STP (Store Pair - non vector)
2253  * LDNP (Load Pair of SIMD&FP - non-temporal hint)
2254  * LDP (Load Pair of SIMD&FP)
2255  * STNP (Store Pair of SIMD&FP - non-temporal hint)
2256  * STP (Store Pair of SIMD&FP)
2257  *
2258  *  31 30 29   27  26  25 24   23  22 21   15 14   10 9    5 4    0
2259  * +-----+-------+---+---+-------+---+-----------------------------+
2260  * | opc | 1 0 1 | V | 0 | index | L |  imm7 |  Rt2  |  Rn  | Rt   |
2261  * +-----+-------+---+---+-------+---+-------+-------+------+------+
2262  *
2263  * opc: LDP/STP/LDNP/STNP        00 -> 32 bit, 10 -> 64 bit
2264  *      LDPSW                    01
2265  *      LDP/STP/LDNP/STNP (SIMD) 00 -> 32 bit, 01 -> 64 bit, 10 -> 128 bit
2266  *   V: 0 -> GPR, 1 -> Vector
2267  * idx: 00 -> signed offset with non-temporal hint, 01 -> post-index,
2268  *      10 -> signed offset, 11 -> pre-index
2269  *   L: 0 -> Store 1 -> Load
2270  *
2271  * Rt, Rt2 = GPR or SIMD registers to be stored
2272  * Rn = general purpose register containing address
2273  * imm7 = signed offset (multiple of 4 or 8 depending on size)
2274  */
2275 static void disas_ldst_pair(DisasContext *s, uint32_t insn)
2276 {
2277     int rt = extract32(insn, 0, 5);
2278     int rn = extract32(insn, 5, 5);
2279     int rt2 = extract32(insn, 10, 5);
2280     uint64_t offset = sextract64(insn, 15, 7);
2281     int index = extract32(insn, 23, 2);
2282     bool is_vector = extract32(insn, 26, 1);
2283     bool is_load = extract32(insn, 22, 1);
2284     int opc = extract32(insn, 30, 2);
2285
2286     bool is_signed = false;
2287     bool postindex = false;
2288     bool wback = false;
2289
2290     TCGv_i64 tcg_addr; /* calculated address */
2291     int size;
2292
2293     if (opc == 3) {
2294         unallocated_encoding(s);
2295         return;
2296     }
2297
2298     if (is_vector) {
2299         size = 2 + opc;
2300     } else {
2301         size = 2 + extract32(opc, 1, 1);
2302         is_signed = extract32(opc, 0, 1);
2303         if (!is_load && is_signed) {
2304             unallocated_encoding(s);
2305             return;
2306         }
2307     }
2308
2309     switch (index) {
2310     case 1: /* post-index */
2311         postindex = true;
2312         wback = true;
2313         break;
2314     case 0:
2315         /* signed offset with "non-temporal" hint. Since we don't emulate
2316          * caches we don't care about hints to the cache system about
2317          * data access patterns, and handle this identically to plain
2318          * signed offset.
2319          */
2320         if (is_signed) {
2321             /* There is no non-temporal-hint version of LDPSW */
2322             unallocated_encoding(s);
2323             return;
2324         }
2325         postindex = false;
2326         break;
2327     case 2: /* signed offset, rn not updated */
2328         postindex = false;
2329         break;
2330     case 3: /* pre-index */
2331         postindex = false;
2332         wback = true;
2333         break;
2334     }
2335
2336     if (is_vector && !fp_access_check(s)) {
2337         return;
2338     }
2339
2340     offset <<= size;
2341
2342     if (rn == 31) {
2343         gen_check_sp_alignment(s);
2344     }
2345
2346     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2347
2348     if (!postindex) {
2349         tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2350     }
2351
2352     if (is_vector) {
2353         if (is_load) {
2354             do_fp_ld(s, rt, tcg_addr, size);
2355         } else {
2356             do_fp_st(s, rt, tcg_addr, size);
2357         }
2358         tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2359         if (is_load) {
2360             do_fp_ld(s, rt2, tcg_addr, size);
2361         } else {
2362             do_fp_st(s, rt2, tcg_addr, size);
2363         }
2364     } else {
2365         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2366         TCGv_i64 tcg_rt2 = cpu_reg(s, rt2);
2367
2368         if (is_load) {
2369             TCGv_i64 tmp = tcg_temp_new_i64();
2370
2371             /* Do not modify tcg_rt before recognizing any exception
2372              * from the second load.
2373              */
2374             do_gpr_ld(s, tmp, tcg_addr, size, is_signed, false,
2375                       false, 0, false, false);
2376             tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2377             do_gpr_ld(s, tcg_rt2, tcg_addr, size, is_signed, false,
2378                       false, 0, false, false);
2379
2380             tcg_gen_mov_i64(tcg_rt, tmp);
2381             tcg_temp_free_i64(tmp);
2382         } else {
2383             do_gpr_st(s, tcg_rt, tcg_addr, size,
2384                       false, 0, false, false);
2385             tcg_gen_addi_i64(tcg_addr, tcg_addr, 1 << size);
2386             do_gpr_st(s, tcg_rt2, tcg_addr, size,
2387                       false, 0, false, false);
2388         }
2389     }
2390
2391     if (wback) {
2392         if (postindex) {
2393             tcg_gen_addi_i64(tcg_addr, tcg_addr, offset - (1 << size));
2394         } else {
2395             tcg_gen_subi_i64(tcg_addr, tcg_addr, 1 << size);
2396         }
2397         tcg_gen_mov_i64(cpu_reg_sp(s, rn), tcg_addr);
2398     }
2399 }
2400
2401 /*
2402  * Load/store (immediate post-indexed)
2403  * Load/store (immediate pre-indexed)
2404  * Load/store (unscaled immediate)
2405  *
2406  * 31 30 29   27  26 25 24 23 22 21  20    12 11 10 9    5 4    0
2407  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2408  * |size| 1 1 1 | V | 0 0 | opc | 0 |  imm9  | idx |  Rn  |  Rt  |
2409  * +----+-------+---+-----+-----+---+--------+-----+------+------+
2410  *
2411  * idx = 01 -> post-indexed, 11 pre-indexed, 00 unscaled imm. (no writeback)
2412          10 -> unprivileged
2413  * V = 0 -> non-vector
2414  * size: 00 -> 8 bit, 01 -> 16 bit, 10 -> 32 bit, 11 -> 64bit
2415  * opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2416  */
2417 static void disas_ldst_reg_imm9(DisasContext *s, uint32_t insn,
2418                                 int opc,
2419                                 int size,
2420                                 int rt,
2421                                 bool is_vector)
2422 {
2423     int rn = extract32(insn, 5, 5);
2424     int imm9 = sextract32(insn, 12, 9);
2425     int idx = extract32(insn, 10, 2);
2426     bool is_signed = false;
2427     bool is_store = false;
2428     bool is_extended = false;
2429     bool is_unpriv = (idx == 2);
2430     bool iss_valid = !is_vector;
2431     bool post_index;
2432     bool writeback;
2433
2434     TCGv_i64 tcg_addr;
2435
2436     if (is_vector) {
2437         size |= (opc & 2) << 1;
2438         if (size > 4 || is_unpriv) {
2439             unallocated_encoding(s);
2440             return;
2441         }
2442         is_store = ((opc & 1) == 0);
2443         if (!fp_access_check(s)) {
2444             return;
2445         }
2446     } else {
2447         if (size == 3 && opc == 2) {
2448             /* PRFM - prefetch */
2449             if (is_unpriv) {
2450                 unallocated_encoding(s);
2451                 return;
2452             }
2453             return;
2454         }
2455         if (opc == 3 && size > 1) {
2456             unallocated_encoding(s);
2457             return;
2458         }
2459         is_store = (opc == 0);
2460         is_signed = extract32(opc, 1, 1);
2461         is_extended = (size < 3) && extract32(opc, 0, 1);
2462     }
2463
2464     switch (idx) {
2465     case 0:
2466     case 2:
2467         post_index = false;
2468         writeback = false;
2469         break;
2470     case 1:
2471         post_index = true;
2472         writeback = true;
2473         break;
2474     case 3:
2475         post_index = false;
2476         writeback = true;
2477         break;
2478     default:
2479         g_assert_not_reached();
2480     }
2481
2482     if (rn == 31) {
2483         gen_check_sp_alignment(s);
2484     }
2485     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2486
2487     if (!post_index) {
2488         tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2489     }
2490
2491     if (is_vector) {
2492         if (is_store) {
2493             do_fp_st(s, rt, tcg_addr, size);
2494         } else {
2495             do_fp_ld(s, rt, tcg_addr, size);
2496         }
2497     } else {
2498         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2499         int memidx = is_unpriv ? get_a64_user_mem_index(s) : get_mem_index(s);
2500         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2501
2502         if (is_store) {
2503             do_gpr_st_memidx(s, tcg_rt, tcg_addr, size, memidx,
2504                              iss_valid, rt, iss_sf, false);
2505         } else {
2506             do_gpr_ld_memidx(s, tcg_rt, tcg_addr, size,
2507                              is_signed, is_extended, memidx,
2508                              iss_valid, rt, iss_sf, false);
2509         }
2510     }
2511
2512     if (writeback) {
2513         TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
2514         if (post_index) {
2515             tcg_gen_addi_i64(tcg_addr, tcg_addr, imm9);
2516         }
2517         tcg_gen_mov_i64(tcg_rn, tcg_addr);
2518     }
2519 }
2520
2521 /*
2522  * Load/store (register offset)
2523  *
2524  * 31 30 29   27  26 25 24 23 22 21  20  16 15 13 12 11 10 9  5 4  0
2525  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2526  * |size| 1 1 1 | V | 0 0 | opc | 1 |  Rm  | opt | S| 1 0 | Rn | Rt |
2527  * +----+-------+---+-----+-----+---+------+-----+--+-----+----+----+
2528  *
2529  * For non-vector:
2530  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2531  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2532  * For vector:
2533  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2534  *   opc<0>: 0 -> store, 1 -> load
2535  * V: 1 -> vector/simd
2536  * opt: extend encoding (see DecodeRegExtend)
2537  * S: if S=1 then scale (essentially index by sizeof(size))
2538  * Rt: register to transfer into/out of
2539  * Rn: address register or SP for base
2540  * Rm: offset register or ZR for offset
2541  */
2542 static void disas_ldst_reg_roffset(DisasContext *s, uint32_t insn,
2543                                    int opc,
2544                                    int size,
2545                                    int rt,
2546                                    bool is_vector)
2547 {
2548     int rn = extract32(insn, 5, 5);
2549     int shift = extract32(insn, 12, 1);
2550     int rm = extract32(insn, 16, 5);
2551     int opt = extract32(insn, 13, 3);
2552     bool is_signed = false;
2553     bool is_store = false;
2554     bool is_extended = false;
2555
2556     TCGv_i64 tcg_rm;
2557     TCGv_i64 tcg_addr;
2558
2559     if (extract32(opt, 1, 1) == 0) {
2560         unallocated_encoding(s);
2561         return;
2562     }
2563
2564     if (is_vector) {
2565         size |= (opc & 2) << 1;
2566         if (size > 4) {
2567             unallocated_encoding(s);
2568             return;
2569         }
2570         is_store = !extract32(opc, 0, 1);
2571         if (!fp_access_check(s)) {
2572             return;
2573         }
2574     } else {
2575         if (size == 3 && opc == 2) {
2576             /* PRFM - prefetch */
2577             return;
2578         }
2579         if (opc == 3 && size > 1) {
2580             unallocated_encoding(s);
2581             return;
2582         }
2583         is_store = (opc == 0);
2584         is_signed = extract32(opc, 1, 1);
2585         is_extended = (size < 3) && extract32(opc, 0, 1);
2586     }
2587
2588     if (rn == 31) {
2589         gen_check_sp_alignment(s);
2590     }
2591     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2592
2593     tcg_rm = read_cpu_reg(s, rm, 1);
2594     ext_and_shift_reg(tcg_rm, tcg_rm, opt, shift ? size : 0);
2595
2596     tcg_gen_add_i64(tcg_addr, tcg_addr, tcg_rm);
2597
2598     if (is_vector) {
2599         if (is_store) {
2600             do_fp_st(s, rt, tcg_addr, size);
2601         } else {
2602             do_fp_ld(s, rt, tcg_addr, size);
2603         }
2604     } else {
2605         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2606         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2607         if (is_store) {
2608             do_gpr_st(s, tcg_rt, tcg_addr, size,
2609                       true, rt, iss_sf, false);
2610         } else {
2611             do_gpr_ld(s, tcg_rt, tcg_addr, size,
2612                       is_signed, is_extended,
2613                       true, rt, iss_sf, false);
2614         }
2615     }
2616 }
2617
2618 /*
2619  * Load/store (unsigned immediate)
2620  *
2621  * 31 30 29   27  26 25 24 23 22 21        10 9     5
2622  * +----+-------+---+-----+-----+------------+-------+------+
2623  * |size| 1 1 1 | V | 0 1 | opc |   imm12    |  Rn   |  Rt  |
2624  * +----+-------+---+-----+-----+------------+-------+------+
2625  *
2626  * For non-vector:
2627  *   size: 00-> byte, 01 -> 16 bit, 10 -> 32bit, 11 -> 64bit
2628  *   opc: 00 -> store, 01 -> loadu, 10 -> loads 64, 11 -> loads 32
2629  * For vector:
2630  *   size is opc<1>:size<1:0> so 100 -> 128 bit; 110 and 111 unallocated
2631  *   opc<0>: 0 -> store, 1 -> load
2632  * Rn: base address register (inc SP)
2633  * Rt: target register
2634  */
2635 static void disas_ldst_reg_unsigned_imm(DisasContext *s, uint32_t insn,
2636                                         int opc,
2637                                         int size,
2638                                         int rt,
2639                                         bool is_vector)
2640 {
2641     int rn = extract32(insn, 5, 5);
2642     unsigned int imm12 = extract32(insn, 10, 12);
2643     unsigned int offset;
2644
2645     TCGv_i64 tcg_addr;
2646
2647     bool is_store;
2648     bool is_signed = false;
2649     bool is_extended = false;
2650
2651     if (is_vector) {
2652         size |= (opc & 2) << 1;
2653         if (size > 4) {
2654             unallocated_encoding(s);
2655             return;
2656         }
2657         is_store = !extract32(opc, 0, 1);
2658         if (!fp_access_check(s)) {
2659             return;
2660         }
2661     } else {
2662         if (size == 3 && opc == 2) {
2663             /* PRFM - prefetch */
2664             return;
2665         }
2666         if (opc == 3 && size > 1) {
2667             unallocated_encoding(s);
2668             return;
2669         }
2670         is_store = (opc == 0);
2671         is_signed = extract32(opc, 1, 1);
2672         is_extended = (size < 3) && extract32(opc, 0, 1);
2673     }
2674
2675     if (rn == 31) {
2676         gen_check_sp_alignment(s);
2677     }
2678     tcg_addr = read_cpu_reg_sp(s, rn, 1);
2679     offset = imm12 << size;
2680     tcg_gen_addi_i64(tcg_addr, tcg_addr, offset);
2681
2682     if (is_vector) {
2683         if (is_store) {
2684             do_fp_st(s, rt, tcg_addr, size);
2685         } else {
2686             do_fp_ld(s, rt, tcg_addr, size);
2687         }
2688     } else {
2689         TCGv_i64 tcg_rt = cpu_reg(s, rt);
2690         bool iss_sf = disas_ldst_compute_iss_sf(size, is_signed, opc);
2691         if (is_store) {
2692             do_gpr_st(s, tcg_rt, tcg_addr, size,
2693                       true, rt, iss_sf, false);
2694         } else {
2695             do_gpr_ld(s, tcg_rt, tcg_addr, size, is_signed, is_extended,
2696                       true, rt, iss_sf, false);
2697         }
2698     }
2699 }
2700
2701 /* Load/store register (all forms) */
2702 static void disas_ldst_reg(DisasContext *s, uint32_t insn)
2703 {
2704     int rt = extract32(insn, 0, 5);
2705     int opc = extract32(insn, 22, 2);
2706     bool is_vector = extract32(insn, 26, 1);
2707     int size = extract32(insn, 30, 2);
2708
2709     switch (extract32(insn, 24, 2)) {
2710     case 0:
2711         if (extract32(insn, 21, 1) == 1 && extract32(insn, 10, 2) == 2) {
2712             disas_ldst_reg_roffset(s, insn, opc, size, rt, is_vector);
2713         } else {
2714             /* Load/store register (unscaled immediate)
2715              * Load/store immediate pre/post-indexed
2716              * Load/store register unprivileged
2717              */
2718             disas_ldst_reg_imm9(s, insn, opc, size, rt, is_vector);
2719         }
2720         break;
2721     case 1:
2722         disas_ldst_reg_unsigned_imm(s, insn, opc, size, rt, is_vector);
2723         break;
2724     default:
2725         unallocated_encoding(s);
2726         break;
2727     }
2728 }
2729
2730 /* AdvSIMD load/store multiple structures
2731  *
2732  *  31  30  29           23 22  21         16 15    12 11  10 9    5 4    0
2733  * +---+---+---------------+---+-------------+--------+------+------+------+
2734  * | 0 | Q | 0 0 1 1 0 0 0 | L | 0 0 0 0 0 0 | opcode | size |  Rn  |  Rt  |
2735  * +---+---+---------------+---+-------------+--------+------+------+------+
2736  *
2737  * AdvSIMD load/store multiple structures (post-indexed)
2738  *
2739  *  31  30  29           23 22  21  20     16 15    12 11  10 9    5 4    0
2740  * +---+---+---------------+---+---+---------+--------+------+------+------+
2741  * | 0 | Q | 0 0 1 1 0 0 1 | L | 0 |   Rm    | opcode | size |  Rn  |  Rt  |
2742  * +---+---+---------------+---+---+---------+--------+------+------+------+
2743  *
2744  * Rt: first (or only) SIMD&FP register to be transferred
2745  * Rn: base address or SP
2746  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2747  */
2748 static void disas_ldst_multiple_struct(DisasContext *s, uint32_t insn)
2749 {
2750     int rt = extract32(insn, 0, 5);
2751     int rn = extract32(insn, 5, 5);
2752     int size = extract32(insn, 10, 2);
2753     int opcode = extract32(insn, 12, 4);
2754     bool is_store = !extract32(insn, 22, 1);
2755     bool is_postidx = extract32(insn, 23, 1);
2756     bool is_q = extract32(insn, 30, 1);
2757     TCGv_i64 tcg_addr, tcg_rn;
2758
2759     int ebytes = 1 << size;
2760     int elements = (is_q ? 128 : 64) / (8 << size);
2761     int rpt;    /* num iterations */
2762     int selem;  /* structure elements */
2763     int r;
2764
2765     if (extract32(insn, 31, 1) || extract32(insn, 21, 1)) {
2766         unallocated_encoding(s);
2767         return;
2768     }
2769
2770     /* From the shared decode logic */
2771     switch (opcode) {
2772     case 0x0:
2773         rpt = 1;
2774         selem = 4;
2775         break;
2776     case 0x2:
2777         rpt = 4;
2778         selem = 1;
2779         break;
2780     case 0x4:
2781         rpt = 1;
2782         selem = 3;
2783         break;
2784     case 0x6:
2785         rpt = 3;
2786         selem = 1;
2787         break;
2788     case 0x7:
2789         rpt = 1;
2790         selem = 1;
2791         break;
2792     case 0x8:
2793         rpt = 1;
2794         selem = 2;
2795         break;
2796     case 0xa:
2797         rpt = 2;
2798         selem = 1;
2799         break;
2800     default:
2801         unallocated_encoding(s);
2802         return;
2803     }
2804
2805     if (size == 3 && !is_q && selem != 1) {
2806         /* reserved */
2807         unallocated_encoding(s);
2808         return;
2809     }
2810
2811     if (!fp_access_check(s)) {
2812         return;
2813     }
2814
2815     if (rn == 31) {
2816         gen_check_sp_alignment(s);
2817     }
2818
2819     tcg_rn = cpu_reg_sp(s, rn);
2820     tcg_addr = tcg_temp_new_i64();
2821     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2822
2823     for (r = 0; r < rpt; r++) {
2824         int e;
2825         for (e = 0; e < elements; e++) {
2826             int tt = (rt + r) % 32;
2827             int xs;
2828             for (xs = 0; xs < selem; xs++) {
2829                 if (is_store) {
2830                     do_vec_st(s, tt, e, tcg_addr, size);
2831                 } else {
2832                     do_vec_ld(s, tt, e, tcg_addr, size);
2833
2834                     /* For non-quad operations, setting a slice of the low
2835                      * 64 bits of the register clears the high 64 bits (in
2836                      * the ARM ARM pseudocode this is implicit in the fact
2837                      * that 'rval' is a 64 bit wide variable).
2838                      * For quad operations, we might still need to zero the
2839                      * high bits of SVE.  We optimize by noticing that we only
2840                      * need to do this the first time we touch a register.
2841                      */
2842                     if (e == 0 && (r == 0 || xs == selem - 1)) {
2843                         clear_vec_high(s, is_q, tt);
2844                     }
2845                 }
2846                 tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2847                 tt = (tt + 1) % 32;
2848             }
2849         }
2850     }
2851
2852     if (is_postidx) {
2853         int rm = extract32(insn, 16, 5);
2854         if (rm == 31) {
2855             tcg_gen_mov_i64(tcg_rn, tcg_addr);
2856         } else {
2857             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
2858         }
2859     }
2860     tcg_temp_free_i64(tcg_addr);
2861 }
2862
2863 /* AdvSIMD load/store single structure
2864  *
2865  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2866  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2867  * | 0 | Q | 0 0 1 1 0 1 0 | L R | 0 0 0 0 0 | opc | S | size |  Rn  |  Rt  |
2868  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2869  *
2870  * AdvSIMD load/store single structure (post-indexed)
2871  *
2872  *  31  30  29           23 22 21 20       16 15 13 12  11  10 9    5 4    0
2873  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2874  * | 0 | Q | 0 0 1 1 0 1 1 | L R |     Rm    | opc | S | size |  Rn  |  Rt  |
2875  * +---+---+---------------+-----+-----------+-----+---+------+------+------+
2876  *
2877  * Rt: first (or only) SIMD&FP register to be transferred
2878  * Rn: base address or SP
2879  * Rm (post-index only): post-index register (when !31) or size dependent #imm
2880  * index = encoded in Q:S:size dependent on size
2881  *
2882  * lane_size = encoded in R, opc
2883  * transfer width = encoded in opc, S, size
2884  */
2885 static void disas_ldst_single_struct(DisasContext *s, uint32_t insn)
2886 {
2887     int rt = extract32(insn, 0, 5);
2888     int rn = extract32(insn, 5, 5);
2889     int size = extract32(insn, 10, 2);
2890     int S = extract32(insn, 12, 1);
2891     int opc = extract32(insn, 13, 3);
2892     int R = extract32(insn, 21, 1);
2893     int is_load = extract32(insn, 22, 1);
2894     int is_postidx = extract32(insn, 23, 1);
2895     int is_q = extract32(insn, 30, 1);
2896
2897     int scale = extract32(opc, 1, 2);
2898     int selem = (extract32(opc, 0, 1) << 1 | R) + 1;
2899     bool replicate = false;
2900     int index = is_q << 3 | S << 2 | size;
2901     int ebytes, xs;
2902     TCGv_i64 tcg_addr, tcg_rn;
2903
2904     switch (scale) {
2905     case 3:
2906         if (!is_load || S) {
2907             unallocated_encoding(s);
2908             return;
2909         }
2910         scale = size;
2911         replicate = true;
2912         break;
2913     case 0:
2914         break;
2915     case 1:
2916         if (extract32(size, 0, 1)) {
2917             unallocated_encoding(s);
2918             return;
2919         }
2920         index >>= 1;
2921         break;
2922     case 2:
2923         if (extract32(size, 1, 1)) {
2924             unallocated_encoding(s);
2925             return;
2926         }
2927         if (!extract32(size, 0, 1)) {
2928             index >>= 2;
2929         } else {
2930             if (S) {
2931                 unallocated_encoding(s);
2932                 return;
2933             }
2934             index >>= 3;
2935             scale = 3;
2936         }
2937         break;
2938     default:
2939         g_assert_not_reached();
2940     }
2941
2942     if (!fp_access_check(s)) {
2943         return;
2944     }
2945
2946     ebytes = 1 << scale;
2947
2948     if (rn == 31) {
2949         gen_check_sp_alignment(s);
2950     }
2951
2952     tcg_rn = cpu_reg_sp(s, rn);
2953     tcg_addr = tcg_temp_new_i64();
2954     tcg_gen_mov_i64(tcg_addr, tcg_rn);
2955
2956     for (xs = 0; xs < selem; xs++) {
2957         if (replicate) {
2958             /* Load and replicate to all elements */
2959             uint64_t mulconst;
2960             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
2961
2962             tcg_gen_qemu_ld_i64(tcg_tmp, tcg_addr,
2963                                 get_mem_index(s), s->be_data + scale);
2964             switch (scale) {
2965             case 0:
2966                 mulconst = 0x0101010101010101ULL;
2967                 break;
2968             case 1:
2969                 mulconst = 0x0001000100010001ULL;
2970                 break;
2971             case 2:
2972                 mulconst = 0x0000000100000001ULL;
2973                 break;
2974             case 3:
2975                 mulconst = 0;
2976                 break;
2977             default:
2978                 g_assert_not_reached();
2979             }
2980             if (mulconst) {
2981                 tcg_gen_muli_i64(tcg_tmp, tcg_tmp, mulconst);
2982             }
2983             write_vec_element(s, tcg_tmp, rt, 0, MO_64);
2984             if (is_q) {
2985                 write_vec_element(s, tcg_tmp, rt, 1, MO_64);
2986             }
2987             tcg_temp_free_i64(tcg_tmp);
2988             clear_vec_high(s, is_q, rt);
2989         } else {
2990             /* Load/store one element per register */
2991             if (is_load) {
2992                 do_vec_ld(s, rt, index, tcg_addr, scale);
2993             } else {
2994                 do_vec_st(s, rt, index, tcg_addr, scale);
2995             }
2996         }
2997         tcg_gen_addi_i64(tcg_addr, tcg_addr, ebytes);
2998         rt = (rt + 1) % 32;
2999     }
3000
3001     if (is_postidx) {
3002         int rm = extract32(insn, 16, 5);
3003         if (rm == 31) {
3004             tcg_gen_mov_i64(tcg_rn, tcg_addr);
3005         } else {
3006             tcg_gen_add_i64(tcg_rn, tcg_rn, cpu_reg(s, rm));
3007         }
3008     }
3009     tcg_temp_free_i64(tcg_addr);
3010 }
3011
3012 /* Loads and stores */
3013 static void disas_ldst(DisasContext *s, uint32_t insn)
3014 {
3015     switch (extract32(insn, 24, 6)) {
3016     case 0x08: /* Load/store exclusive */
3017         disas_ldst_excl(s, insn);
3018         break;
3019     case 0x18: case 0x1c: /* Load register (literal) */
3020         disas_ld_lit(s, insn);
3021         break;
3022     case 0x28: case 0x29:
3023     case 0x2c: case 0x2d: /* Load/store pair (all forms) */
3024         disas_ldst_pair(s, insn);
3025         break;
3026     case 0x38: case 0x39:
3027     case 0x3c: case 0x3d: /* Load/store register (all forms) */
3028         disas_ldst_reg(s, insn);
3029         break;
3030     case 0x0c: /* AdvSIMD load/store multiple structures */
3031         disas_ldst_multiple_struct(s, insn);
3032         break;
3033     case 0x0d: /* AdvSIMD load/store single structure */
3034         disas_ldst_single_struct(s, insn);
3035         break;
3036     default:
3037         unallocated_encoding(s);
3038         break;
3039     }
3040 }
3041
3042 /* PC-rel. addressing
3043  *   31  30   29 28       24 23                5 4    0
3044  * +----+-------+-----------+-------------------+------+
3045  * | op | immlo | 1 0 0 0 0 |       immhi       |  Rd  |
3046  * +----+-------+-----------+-------------------+------+
3047  */
3048 static void disas_pc_rel_adr(DisasContext *s, uint32_t insn)
3049 {
3050     unsigned int page, rd;
3051     uint64_t base;
3052     uint64_t offset;
3053
3054     page = extract32(insn, 31, 1);
3055     /* SignExtend(immhi:immlo) -> offset */
3056     offset = sextract64(insn, 5, 19);
3057     offset = offset << 2 | extract32(insn, 29, 2);
3058     rd = extract32(insn, 0, 5);
3059     base = s->pc - 4;
3060
3061     if (page) {
3062         /* ADRP (page based) */
3063         base &= ~0xfff;
3064         offset <<= 12;
3065     }
3066
3067     tcg_gen_movi_i64(cpu_reg(s, rd), base + offset);
3068 }
3069
3070 /*
3071  * Add/subtract (immediate)
3072  *
3073  *  31 30 29 28       24 23 22 21         10 9   5 4   0
3074  * +--+--+--+-----------+-----+-------------+-----+-----+
3075  * |sf|op| S| 1 0 0 0 1 |shift|    imm12    |  Rn | Rd  |
3076  * +--+--+--+-----------+-----+-------------+-----+-----+
3077  *
3078  *    sf: 0 -> 32bit, 1 -> 64bit
3079  *    op: 0 -> add  , 1 -> sub
3080  *     S: 1 -> set flags
3081  * shift: 00 -> LSL imm by 0, 01 -> LSL imm by 12
3082  */
3083 static void disas_add_sub_imm(DisasContext *s, uint32_t insn)
3084 {
3085     int rd = extract32(insn, 0, 5);
3086     int rn = extract32(insn, 5, 5);
3087     uint64_t imm = extract32(insn, 10, 12);
3088     int shift = extract32(insn, 22, 2);
3089     bool setflags = extract32(insn, 29, 1);
3090     bool sub_op = extract32(insn, 30, 1);
3091     bool is_64bit = extract32(insn, 31, 1);
3092
3093     TCGv_i64 tcg_rn = cpu_reg_sp(s, rn);
3094     TCGv_i64 tcg_rd = setflags ? cpu_reg(s, rd) : cpu_reg_sp(s, rd);
3095     TCGv_i64 tcg_result;
3096
3097     switch (shift) {
3098     case 0x0:
3099         break;
3100     case 0x1:
3101         imm <<= 12;
3102         break;
3103     default:
3104         unallocated_encoding(s);
3105         return;
3106     }
3107
3108     tcg_result = tcg_temp_new_i64();
3109     if (!setflags) {
3110         if (sub_op) {
3111             tcg_gen_subi_i64(tcg_result, tcg_rn, imm);
3112         } else {
3113             tcg_gen_addi_i64(tcg_result, tcg_rn, imm);
3114         }
3115     } else {
3116         TCGv_i64 tcg_imm = tcg_const_i64(imm);
3117         if (sub_op) {
3118             gen_sub_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3119         } else {
3120             gen_add_CC(is_64bit, tcg_result, tcg_rn, tcg_imm);
3121         }
3122         tcg_temp_free_i64(tcg_imm);
3123     }
3124
3125     if (is_64bit) {
3126         tcg_gen_mov_i64(tcg_rd, tcg_result);
3127     } else {
3128         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3129     }
3130
3131     tcg_temp_free_i64(tcg_result);
3132 }
3133
3134 /* The input should be a value in the bottom e bits (with higher
3135  * bits zero); returns that value replicated into every element
3136  * of size e in a 64 bit integer.
3137  */
3138 static uint64_t bitfield_replicate(uint64_t mask, unsigned int e)
3139 {
3140     assert(e != 0);
3141     while (e < 64) {
3142         mask |= mask << e;
3143         e *= 2;
3144     }
3145     return mask;
3146 }
3147
3148 /* Return a value with the bottom len bits set (where 0 < len <= 64) */
3149 static inline uint64_t bitmask64(unsigned int length)
3150 {
3151     assert(length > 0 && length <= 64);
3152     return ~0ULL >> (64 - length);
3153 }
3154
3155 /* Simplified variant of pseudocode DecodeBitMasks() for the case where we
3156  * only require the wmask. Returns false if the imms/immr/immn are a reserved
3157  * value (ie should cause a guest UNDEF exception), and true if they are
3158  * valid, in which case the decoded bit pattern is written to result.
3159  */
3160 static bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
3161                                    unsigned int imms, unsigned int immr)
3162 {
3163     uint64_t mask;
3164     unsigned e, levels, s, r;
3165     int len;
3166
3167     assert(immn < 2 && imms < 64 && immr < 64);
3168
3169     /* The bit patterns we create here are 64 bit patterns which
3170      * are vectors of identical elements of size e = 2, 4, 8, 16, 32 or
3171      * 64 bits each. Each element contains the same value: a run
3172      * of between 1 and e-1 non-zero bits, rotated within the
3173      * element by between 0 and e-1 bits.
3174      *
3175      * The element size and run length are encoded into immn (1 bit)
3176      * and imms (6 bits) as follows:
3177      * 64 bit elements: immn = 1, imms = <length of run - 1>
3178      * 32 bit elements: immn = 0, imms = 0 : <length of run - 1>
3179      * 16 bit elements: immn = 0, imms = 10 : <length of run - 1>
3180      *  8 bit elements: immn = 0, imms = 110 : <length of run - 1>
3181      *  4 bit elements: immn = 0, imms = 1110 : <length of run - 1>
3182      *  2 bit elements: immn = 0, imms = 11110 : <length of run - 1>
3183      * Notice that immn = 0, imms = 11111x is the only combination
3184      * not covered by one of the above options; this is reserved.
3185      * Further, <length of run - 1> all-ones is a reserved pattern.
3186      *
3187      * In all cases the rotation is by immr % e (and immr is 6 bits).
3188      */
3189
3190     /* First determine the element size */
3191     len = 31 - clz32((immn << 6) | (~imms & 0x3f));
3192     if (len < 1) {
3193         /* This is the immn == 0, imms == 0x11111x case */
3194         return false;
3195     }
3196     e = 1 << len;
3197
3198     levels = e - 1;
3199     s = imms & levels;
3200     r = immr & levels;
3201
3202     if (s == levels) {
3203         /* <length of run - 1> mustn't be all-ones. */
3204         return false;
3205     }
3206
3207     /* Create the value of one element: s+1 set bits rotated
3208      * by r within the element (which is e bits wide)...
3209      */
3210     mask = bitmask64(s + 1);
3211     if (r) {
3212         mask = (mask >> r) | (mask << (e - r));
3213         mask &= bitmask64(e);
3214     }
3215     /* ...then replicate the element over the whole 64 bit value */
3216     mask = bitfield_replicate(mask, e);
3217     *result = mask;
3218     return true;
3219 }
3220
3221 /* Logical (immediate)
3222  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3223  * +----+-----+-------------+---+------+------+------+------+
3224  * | sf | opc | 1 0 0 1 0 0 | N | immr | imms |  Rn  |  Rd  |
3225  * +----+-----+-------------+---+------+------+------+------+
3226  */
3227 static void disas_logic_imm(DisasContext *s, uint32_t insn)
3228 {
3229     unsigned int sf, opc, is_n, immr, imms, rn, rd;
3230     TCGv_i64 tcg_rd, tcg_rn;
3231     uint64_t wmask;
3232     bool is_and = false;
3233
3234     sf = extract32(insn, 31, 1);
3235     opc = extract32(insn, 29, 2);
3236     is_n = extract32(insn, 22, 1);
3237     immr = extract32(insn, 16, 6);
3238     imms = extract32(insn, 10, 6);
3239     rn = extract32(insn, 5, 5);
3240     rd = extract32(insn, 0, 5);
3241
3242     if (!sf && is_n) {
3243         unallocated_encoding(s);
3244         return;
3245     }
3246
3247     if (opc == 0x3) { /* ANDS */
3248         tcg_rd = cpu_reg(s, rd);
3249     } else {
3250         tcg_rd = cpu_reg_sp(s, rd);
3251     }
3252     tcg_rn = cpu_reg(s, rn);
3253
3254     if (!logic_imm_decode_wmask(&wmask, is_n, imms, immr)) {
3255         /* some immediate field values are reserved */
3256         unallocated_encoding(s);
3257         return;
3258     }
3259
3260     if (!sf) {
3261         wmask &= 0xffffffff;
3262     }
3263
3264     switch (opc) {
3265     case 0x3: /* ANDS */
3266     case 0x0: /* AND */
3267         tcg_gen_andi_i64(tcg_rd, tcg_rn, wmask);
3268         is_and = true;
3269         break;
3270     case 0x1: /* ORR */
3271         tcg_gen_ori_i64(tcg_rd, tcg_rn, wmask);
3272         break;
3273     case 0x2: /* EOR */
3274         tcg_gen_xori_i64(tcg_rd, tcg_rn, wmask);
3275         break;
3276     default:
3277         assert(FALSE); /* must handle all above */
3278         break;
3279     }
3280
3281     if (!sf && !is_and) {
3282         /* zero extend final result; we know we can skip this for AND
3283          * since the immediate had the high 32 bits clear.
3284          */
3285         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3286     }
3287
3288     if (opc == 3) { /* ANDS */
3289         gen_logic_CC(sf, tcg_rd);
3290     }
3291 }
3292
3293 /*
3294  * Move wide (immediate)
3295  *
3296  *  31 30 29 28         23 22 21 20             5 4    0
3297  * +--+-----+-------------+-----+----------------+------+
3298  * |sf| opc | 1 0 0 1 0 1 |  hw |  imm16         |  Rd  |
3299  * +--+-----+-------------+-----+----------------+------+
3300  *
3301  * sf: 0 -> 32 bit, 1 -> 64 bit
3302  * opc: 00 -> N, 10 -> Z, 11 -> K
3303  * hw: shift/16 (0,16, and sf only 32, 48)
3304  */
3305 static void disas_movw_imm(DisasContext *s, uint32_t insn)
3306 {
3307     int rd = extract32(insn, 0, 5);
3308     uint64_t imm = extract32(insn, 5, 16);
3309     int sf = extract32(insn, 31, 1);
3310     int opc = extract32(insn, 29, 2);
3311     int pos = extract32(insn, 21, 2) << 4;
3312     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3313     TCGv_i64 tcg_imm;
3314
3315     if (!sf && (pos >= 32)) {
3316         unallocated_encoding(s);
3317         return;
3318     }
3319
3320     switch (opc) {
3321     case 0: /* MOVN */
3322     case 2: /* MOVZ */
3323         imm <<= pos;
3324         if (opc == 0) {
3325             imm = ~imm;
3326         }
3327         if (!sf) {
3328             imm &= 0xffffffffu;
3329         }
3330         tcg_gen_movi_i64(tcg_rd, imm);
3331         break;
3332     case 3: /* MOVK */
3333         tcg_imm = tcg_const_i64(imm);
3334         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_imm, pos, 16);
3335         tcg_temp_free_i64(tcg_imm);
3336         if (!sf) {
3337             tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3338         }
3339         break;
3340     default:
3341         unallocated_encoding(s);
3342         break;
3343     }
3344 }
3345
3346 /* Bitfield
3347  *   31  30 29 28         23 22  21  16 15  10 9    5 4    0
3348  * +----+-----+-------------+---+------+------+------+------+
3349  * | sf | opc | 1 0 0 1 1 0 | N | immr | imms |  Rn  |  Rd  |
3350  * +----+-----+-------------+---+------+------+------+------+
3351  */
3352 static void disas_bitfield(DisasContext *s, uint32_t insn)
3353 {
3354     unsigned int sf, n, opc, ri, si, rn, rd, bitsize, pos, len;
3355     TCGv_i64 tcg_rd, tcg_tmp;
3356
3357     sf = extract32(insn, 31, 1);
3358     opc = extract32(insn, 29, 2);
3359     n = extract32(insn, 22, 1);
3360     ri = extract32(insn, 16, 6);
3361     si = extract32(insn, 10, 6);
3362     rn = extract32(insn, 5, 5);
3363     rd = extract32(insn, 0, 5);
3364     bitsize = sf ? 64 : 32;
3365
3366     if (sf != n || ri >= bitsize || si >= bitsize || opc > 2) {
3367         unallocated_encoding(s);
3368         return;
3369     }
3370
3371     tcg_rd = cpu_reg(s, rd);
3372
3373     /* Suppress the zero-extend for !sf.  Since RI and SI are constrained
3374        to be smaller than bitsize, we'll never reference data outside the
3375        low 32-bits anyway.  */
3376     tcg_tmp = read_cpu_reg(s, rn, 1);
3377
3378     /* Recognize simple(r) extractions.  */
3379     if (si >= ri) {
3380         /* Wd<s-r:0> = Wn<s:r> */
3381         len = (si - ri) + 1;
3382         if (opc == 0) { /* SBFM: ASR, SBFX, SXTB, SXTH, SXTW */
3383             tcg_gen_sextract_i64(tcg_rd, tcg_tmp, ri, len);
3384             goto done;
3385         } else if (opc == 2) { /* UBFM: UBFX, LSR, UXTB, UXTH */
3386             tcg_gen_extract_i64(tcg_rd, tcg_tmp, ri, len);
3387             return;
3388         }
3389         /* opc == 1, BXFIL fall through to deposit */
3390         tcg_gen_extract_i64(tcg_tmp, tcg_tmp, ri, len);
3391         pos = 0;
3392     } else {
3393         /* Handle the ri > si case with a deposit
3394          * Wd<32+s-r,32-r> = Wn<s:0>
3395          */
3396         len = si + 1;
3397         pos = (bitsize - ri) & (bitsize - 1);
3398     }
3399
3400     if (opc == 0 && len < ri) {
3401         /* SBFM: sign extend the destination field from len to fill
3402            the balance of the word.  Let the deposit below insert all
3403            of those sign bits.  */
3404         tcg_gen_sextract_i64(tcg_tmp, tcg_tmp, 0, len);
3405         len = ri;
3406     }
3407
3408     if (opc == 1) { /* BFM, BXFIL */
3409         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_tmp, pos, len);
3410     } else {
3411         /* SBFM or UBFM: We start with zero, and we haven't modified
3412            any bits outside bitsize, therefore the zero-extension
3413            below is unneeded.  */
3414         tcg_gen_deposit_z_i64(tcg_rd, tcg_tmp, pos, len);
3415         return;
3416     }
3417
3418  done:
3419     if (!sf) { /* zero extend final result */
3420         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3421     }
3422 }
3423
3424 /* Extract
3425  *   31  30  29 28         23 22   21  20  16 15    10 9    5 4    0
3426  * +----+------+-------------+---+----+------+--------+------+------+
3427  * | sf | op21 | 1 0 0 1 1 1 | N | o0 |  Rm  |  imms  |  Rn  |  Rd  |
3428  * +----+------+-------------+---+----+------+--------+------+------+
3429  */
3430 static void disas_extract(DisasContext *s, uint32_t insn)
3431 {
3432     unsigned int sf, n, rm, imm, rn, rd, bitsize, op21, op0;
3433
3434     sf = extract32(insn, 31, 1);
3435     n = extract32(insn, 22, 1);
3436     rm = extract32(insn, 16, 5);
3437     imm = extract32(insn, 10, 6);
3438     rn = extract32(insn, 5, 5);
3439     rd = extract32(insn, 0, 5);
3440     op21 = extract32(insn, 29, 2);
3441     op0 = extract32(insn, 21, 1);
3442     bitsize = sf ? 64 : 32;
3443
3444     if (sf != n || op21 || op0 || imm >= bitsize) {
3445         unallocated_encoding(s);
3446     } else {
3447         TCGv_i64 tcg_rd, tcg_rm, tcg_rn;
3448
3449         tcg_rd = cpu_reg(s, rd);
3450
3451         if (unlikely(imm == 0)) {
3452             /* tcg shl_i32/shl_i64 is undefined for 32/64 bit shifts,
3453              * so an extract from bit 0 is a special case.
3454              */
3455             if (sf) {
3456                 tcg_gen_mov_i64(tcg_rd, cpu_reg(s, rm));
3457             } else {
3458                 tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rm));
3459             }
3460         } else if (rm == rn) { /* ROR */
3461             tcg_rm = cpu_reg(s, rm);
3462             if (sf) {
3463                 tcg_gen_rotri_i64(tcg_rd, tcg_rm, imm);
3464             } else {
3465                 TCGv_i32 tmp = tcg_temp_new_i32();
3466                 tcg_gen_extrl_i64_i32(tmp, tcg_rm);
3467                 tcg_gen_rotri_i32(tmp, tmp, imm);
3468                 tcg_gen_extu_i32_i64(tcg_rd, tmp);
3469                 tcg_temp_free_i32(tmp);
3470             }
3471         } else {
3472             tcg_rm = read_cpu_reg(s, rm, sf);
3473             tcg_rn = read_cpu_reg(s, rn, sf);
3474             tcg_gen_shri_i64(tcg_rm, tcg_rm, imm);
3475             tcg_gen_shli_i64(tcg_rn, tcg_rn, bitsize - imm);
3476             tcg_gen_or_i64(tcg_rd, tcg_rm, tcg_rn);
3477             if (!sf) {
3478                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3479             }
3480         }
3481     }
3482 }
3483
3484 /* Data processing - immediate */
3485 static void disas_data_proc_imm(DisasContext *s, uint32_t insn)
3486 {
3487     switch (extract32(insn, 23, 6)) {
3488     case 0x20: case 0x21: /* PC-rel. addressing */
3489         disas_pc_rel_adr(s, insn);
3490         break;
3491     case 0x22: case 0x23: /* Add/subtract (immediate) */
3492         disas_add_sub_imm(s, insn);
3493         break;
3494     case 0x24: /* Logical (immediate) */
3495         disas_logic_imm(s, insn);
3496         break;
3497     case 0x25: /* Move wide (immediate) */
3498         disas_movw_imm(s, insn);
3499         break;
3500     case 0x26: /* Bitfield */
3501         disas_bitfield(s, insn);
3502         break;
3503     case 0x27: /* Extract */
3504         disas_extract(s, insn);
3505         break;
3506     default:
3507         unallocated_encoding(s);
3508         break;
3509     }
3510 }
3511
3512 /* Shift a TCGv src by TCGv shift_amount, put result in dst.
3513  * Note that it is the caller's responsibility to ensure that the
3514  * shift amount is in range (ie 0..31 or 0..63) and provide the ARM
3515  * mandated semantics for out of range shifts.
3516  */
3517 static void shift_reg(TCGv_i64 dst, TCGv_i64 src, int sf,
3518                       enum a64_shift_type shift_type, TCGv_i64 shift_amount)
3519 {
3520     switch (shift_type) {
3521     case A64_SHIFT_TYPE_LSL:
3522         tcg_gen_shl_i64(dst, src, shift_amount);
3523         break;
3524     case A64_SHIFT_TYPE_LSR:
3525         tcg_gen_shr_i64(dst, src, shift_amount);
3526         break;
3527     case A64_SHIFT_TYPE_ASR:
3528         if (!sf) {
3529             tcg_gen_ext32s_i64(dst, src);
3530         }
3531         tcg_gen_sar_i64(dst, sf ? src : dst, shift_amount);
3532         break;
3533     case A64_SHIFT_TYPE_ROR:
3534         if (sf) {
3535             tcg_gen_rotr_i64(dst, src, shift_amount);
3536         } else {
3537             TCGv_i32 t0, t1;
3538             t0 = tcg_temp_new_i32();
3539             t1 = tcg_temp_new_i32();
3540             tcg_gen_extrl_i64_i32(t0, src);
3541             tcg_gen_extrl_i64_i32(t1, shift_amount);
3542             tcg_gen_rotr_i32(t0, t0, t1);
3543             tcg_gen_extu_i32_i64(dst, t0);
3544             tcg_temp_free_i32(t0);
3545             tcg_temp_free_i32(t1);
3546         }
3547         break;
3548     default:
3549         assert(FALSE); /* all shift types should be handled */
3550         break;
3551     }
3552
3553     if (!sf) { /* zero extend final result */
3554         tcg_gen_ext32u_i64(dst, dst);
3555     }
3556 }
3557
3558 /* Shift a TCGv src by immediate, put result in dst.
3559  * The shift amount must be in range (this should always be true as the
3560  * relevant instructions will UNDEF on bad shift immediates).
3561  */
3562 static void shift_reg_imm(TCGv_i64 dst, TCGv_i64 src, int sf,
3563                           enum a64_shift_type shift_type, unsigned int shift_i)
3564 {
3565     assert(shift_i < (sf ? 64 : 32));
3566
3567     if (shift_i == 0) {
3568         tcg_gen_mov_i64(dst, src);
3569     } else {
3570         TCGv_i64 shift_const;
3571
3572         shift_const = tcg_const_i64(shift_i);
3573         shift_reg(dst, src, sf, shift_type, shift_const);
3574         tcg_temp_free_i64(shift_const);
3575     }
3576 }
3577
3578 /* Logical (shifted register)
3579  *   31  30 29 28       24 23   22 21  20  16 15    10 9    5 4    0
3580  * +----+-----+-----------+-------+---+------+--------+------+------+
3581  * | sf | opc | 0 1 0 1 0 | shift | N |  Rm  |  imm6  |  Rn  |  Rd  |
3582  * +----+-----+-----------+-------+---+------+--------+------+------+
3583  */
3584 static void disas_logic_reg(DisasContext *s, uint32_t insn)
3585 {
3586     TCGv_i64 tcg_rd, tcg_rn, tcg_rm;
3587     unsigned int sf, opc, shift_type, invert, rm, shift_amount, rn, rd;
3588
3589     sf = extract32(insn, 31, 1);
3590     opc = extract32(insn, 29, 2);
3591     shift_type = extract32(insn, 22, 2);
3592     invert = extract32(insn, 21, 1);
3593     rm = extract32(insn, 16, 5);
3594     shift_amount = extract32(insn, 10, 6);
3595     rn = extract32(insn, 5, 5);
3596     rd = extract32(insn, 0, 5);
3597
3598     if (!sf && (shift_amount & (1 << 5))) {
3599         unallocated_encoding(s);
3600         return;
3601     }
3602
3603     tcg_rd = cpu_reg(s, rd);
3604
3605     if (opc == 1 && shift_amount == 0 && shift_type == 0 && rn == 31) {
3606         /* Unshifted ORR and ORN with WZR/XZR is the standard encoding for
3607          * register-register MOV and MVN, so it is worth special casing.
3608          */
3609         tcg_rm = cpu_reg(s, rm);
3610         if (invert) {
3611             tcg_gen_not_i64(tcg_rd, tcg_rm);
3612             if (!sf) {
3613                 tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3614             }
3615         } else {
3616             if (sf) {
3617                 tcg_gen_mov_i64(tcg_rd, tcg_rm);
3618             } else {
3619                 tcg_gen_ext32u_i64(tcg_rd, tcg_rm);
3620             }
3621         }
3622         return;
3623     }
3624
3625     tcg_rm = read_cpu_reg(s, rm, sf);
3626
3627     if (shift_amount) {
3628         shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, shift_amount);
3629     }
3630
3631     tcg_rn = cpu_reg(s, rn);
3632
3633     switch (opc | (invert << 2)) {
3634     case 0: /* AND */
3635     case 3: /* ANDS */
3636         tcg_gen_and_i64(tcg_rd, tcg_rn, tcg_rm);
3637         break;
3638     case 1: /* ORR */
3639         tcg_gen_or_i64(tcg_rd, tcg_rn, tcg_rm);
3640         break;
3641     case 2: /* EOR */
3642         tcg_gen_xor_i64(tcg_rd, tcg_rn, tcg_rm);
3643         break;
3644     case 4: /* BIC */
3645     case 7: /* BICS */
3646         tcg_gen_andc_i64(tcg_rd, tcg_rn, tcg_rm);
3647         break;
3648     case 5: /* ORN */
3649         tcg_gen_orc_i64(tcg_rd, tcg_rn, tcg_rm);
3650         break;
3651     case 6: /* EON */
3652         tcg_gen_eqv_i64(tcg_rd, tcg_rn, tcg_rm);
3653         break;
3654     default:
3655         assert(FALSE);
3656         break;
3657     }
3658
3659     if (!sf) {
3660         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
3661     }
3662
3663     if (opc == 3) {
3664         gen_logic_CC(sf, tcg_rd);
3665     }
3666 }
3667
3668 /*
3669  * Add/subtract (extended register)
3670  *
3671  *  31|30|29|28       24|23 22|21|20   16|15  13|12  10|9  5|4  0|
3672  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3673  * |sf|op| S| 0 1 0 1 1 | opt | 1|  Rm   |option| imm3 | Rn | Rd |
3674  * +--+--+--+-----------+-----+--+-------+------+------+----+----+
3675  *
3676  *  sf: 0 -> 32bit, 1 -> 64bit
3677  *  op: 0 -> add  , 1 -> sub
3678  *   S: 1 -> set flags
3679  * opt: 00
3680  * option: extension type (see DecodeRegExtend)
3681  * imm3: optional shift to Rm
3682  *
3683  * Rd = Rn + LSL(extend(Rm), amount)
3684  */
3685 static void disas_add_sub_ext_reg(DisasContext *s, uint32_t insn)
3686 {
3687     int rd = extract32(insn, 0, 5);
3688     int rn = extract32(insn, 5, 5);
3689     int imm3 = extract32(insn, 10, 3);
3690     int option = extract32(insn, 13, 3);
3691     int rm = extract32(insn, 16, 5);
3692     bool setflags = extract32(insn, 29, 1);
3693     bool sub_op = extract32(insn, 30, 1);
3694     bool sf = extract32(insn, 31, 1);
3695
3696     TCGv_i64 tcg_rm, tcg_rn; /* temps */
3697     TCGv_i64 tcg_rd;
3698     TCGv_i64 tcg_result;
3699
3700     if (imm3 > 4) {
3701         unallocated_encoding(s);
3702         return;
3703     }
3704
3705     /* non-flag setting ops may use SP */
3706     if (!setflags) {
3707         tcg_rd = cpu_reg_sp(s, rd);
3708     } else {
3709         tcg_rd = cpu_reg(s, rd);
3710     }
3711     tcg_rn = read_cpu_reg_sp(s, rn, sf);
3712
3713     tcg_rm = read_cpu_reg(s, rm, sf);
3714     ext_and_shift_reg(tcg_rm, tcg_rm, option, imm3);
3715
3716     tcg_result = tcg_temp_new_i64();
3717
3718     if (!setflags) {
3719         if (sub_op) {
3720             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3721         } else {
3722             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3723         }
3724     } else {
3725         if (sub_op) {
3726             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3727         } else {
3728             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3729         }
3730     }
3731
3732     if (sf) {
3733         tcg_gen_mov_i64(tcg_rd, tcg_result);
3734     } else {
3735         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3736     }
3737
3738     tcg_temp_free_i64(tcg_result);
3739 }
3740
3741 /*
3742  * Add/subtract (shifted register)
3743  *
3744  *  31 30 29 28       24 23 22 21 20   16 15     10 9    5 4    0
3745  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3746  * |sf|op| S| 0 1 0 1 1 |shift| 0|  Rm   |  imm6   |  Rn  |  Rd  |
3747  * +--+--+--+-----------+-----+--+-------+---------+------+------+
3748  *
3749  *    sf: 0 -> 32bit, 1 -> 64bit
3750  *    op: 0 -> add  , 1 -> sub
3751  *     S: 1 -> set flags
3752  * shift: 00 -> LSL, 01 -> LSR, 10 -> ASR, 11 -> RESERVED
3753  *  imm6: Shift amount to apply to Rm before the add/sub
3754  */
3755 static void disas_add_sub_reg(DisasContext *s, uint32_t insn)
3756 {
3757     int rd = extract32(insn, 0, 5);
3758     int rn = extract32(insn, 5, 5);
3759     int imm6 = extract32(insn, 10, 6);
3760     int rm = extract32(insn, 16, 5);
3761     int shift_type = extract32(insn, 22, 2);
3762     bool setflags = extract32(insn, 29, 1);
3763     bool sub_op = extract32(insn, 30, 1);
3764     bool sf = extract32(insn, 31, 1);
3765
3766     TCGv_i64 tcg_rd = cpu_reg(s, rd);
3767     TCGv_i64 tcg_rn, tcg_rm;
3768     TCGv_i64 tcg_result;
3769
3770     if ((shift_type == 3) || (!sf && (imm6 > 31))) {
3771         unallocated_encoding(s);
3772         return;
3773     }
3774
3775     tcg_rn = read_cpu_reg(s, rn, sf);
3776     tcg_rm = read_cpu_reg(s, rm, sf);
3777
3778     shift_reg_imm(tcg_rm, tcg_rm, sf, shift_type, imm6);
3779
3780     tcg_result = tcg_temp_new_i64();
3781
3782     if (!setflags) {
3783         if (sub_op) {
3784             tcg_gen_sub_i64(tcg_result, tcg_rn, tcg_rm);
3785         } else {
3786             tcg_gen_add_i64(tcg_result, tcg_rn, tcg_rm);
3787         }
3788     } else {
3789         if (sub_op) {
3790             gen_sub_CC(sf, tcg_result, tcg_rn, tcg_rm);
3791         } else {
3792             gen_add_CC(sf, tcg_result, tcg_rn, tcg_rm);
3793         }
3794     }
3795
3796     if (sf) {
3797         tcg_gen_mov_i64(tcg_rd, tcg_result);
3798     } else {
3799         tcg_gen_ext32u_i64(tcg_rd, tcg_result);
3800     }
3801
3802     tcg_temp_free_i64(tcg_result);
3803 }
3804
3805 /* Data-processing (3 source)
3806  *
3807  *    31 30  29 28       24 23 21  20  16  15  14  10 9    5 4    0
3808  *  +--+------+-----------+------+------+----+------+------+------+
3809  *  |sf| op54 | 1 1 0 1 1 | op31 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
3810  *  +--+------+-----------+------+------+----+------+------+------+
3811  */
3812 static void disas_data_proc_3src(DisasContext *s, uint32_t insn)
3813 {
3814     int rd = extract32(insn, 0, 5);
3815     int rn = extract32(insn, 5, 5);
3816     int ra = extract32(insn, 10, 5);
3817     int rm = extract32(insn, 16, 5);
3818     int op_id = (extract32(insn, 29, 3) << 4) |
3819         (extract32(insn, 21, 3) << 1) |
3820         extract32(insn, 15, 1);
3821     bool sf = extract32(insn, 31, 1);
3822     bool is_sub = extract32(op_id, 0, 1);
3823     bool is_high = extract32(op_id, 2, 1);
3824     bool is_signed = false;
3825     TCGv_i64 tcg_op1;
3826     TCGv_i64 tcg_op2;
3827     TCGv_i64 tcg_tmp;
3828
3829     /* Note that op_id is sf:op54:op31:o0 so it includes the 32/64 size flag */
3830     switch (op_id) {
3831     case 0x42: /* SMADDL */
3832     case 0x43: /* SMSUBL */
3833     case 0x44: /* SMULH */
3834         is_signed = true;
3835         break;
3836     case 0x0: /* MADD (32bit) */
3837     case 0x1: /* MSUB (32bit) */
3838     case 0x40: /* MADD (64bit) */
3839     case 0x41: /* MSUB (64bit) */
3840     case 0x4a: /* UMADDL */
3841     case 0x4b: /* UMSUBL */
3842     case 0x4c: /* UMULH */
3843         break;
3844     default:
3845         unallocated_encoding(s);
3846         return;
3847     }
3848
3849     if (is_high) {
3850         TCGv_i64 low_bits = tcg_temp_new_i64(); /* low bits discarded */
3851         TCGv_i64 tcg_rd = cpu_reg(s, rd);
3852         TCGv_i64 tcg_rn = cpu_reg(s, rn);
3853         TCGv_i64 tcg_rm = cpu_reg(s, rm);
3854
3855         if (is_signed) {
3856             tcg_gen_muls2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3857         } else {
3858             tcg_gen_mulu2_i64(low_bits, tcg_rd, tcg_rn, tcg_rm);
3859         }
3860
3861         tcg_temp_free_i64(low_bits);
3862         return;
3863     }
3864
3865     tcg_op1 = tcg_temp_new_i64();
3866     tcg_op2 = tcg_temp_new_i64();
3867     tcg_tmp = tcg_temp_new_i64();
3868
3869     if (op_id < 0x42) {
3870         tcg_gen_mov_i64(tcg_op1, cpu_reg(s, rn));
3871         tcg_gen_mov_i64(tcg_op2, cpu_reg(s, rm));
3872     } else {
3873         if (is_signed) {
3874             tcg_gen_ext32s_i64(tcg_op1, cpu_reg(s, rn));
3875             tcg_gen_ext32s_i64(tcg_op2, cpu_reg(s, rm));
3876         } else {
3877             tcg_gen_ext32u_i64(tcg_op1, cpu_reg(s, rn));
3878             tcg_gen_ext32u_i64(tcg_op2, cpu_reg(s, rm));
3879         }
3880     }
3881
3882     if (ra == 31 && !is_sub) {
3883         /* Special-case MADD with rA == XZR; it is the standard MUL alias */
3884         tcg_gen_mul_i64(cpu_reg(s, rd), tcg_op1, tcg_op2);
3885     } else {
3886         tcg_gen_mul_i64(tcg_tmp, tcg_op1, tcg_op2);
3887         if (is_sub) {
3888             tcg_gen_sub_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3889         } else {
3890             tcg_gen_add_i64(cpu_reg(s, rd), cpu_reg(s, ra), tcg_tmp);
3891         }
3892     }
3893
3894     if (!sf) {
3895         tcg_gen_ext32u_i64(cpu_reg(s, rd), cpu_reg(s, rd));
3896     }
3897
3898     tcg_temp_free_i64(tcg_op1);
3899     tcg_temp_free_i64(tcg_op2);
3900     tcg_temp_free_i64(tcg_tmp);
3901 }
3902
3903 /* Add/subtract (with carry)
3904  *  31 30 29 28 27 26 25 24 23 22 21  20  16  15   10  9    5 4   0
3905  * +--+--+--+------------------------+------+---------+------+-----+
3906  * |sf|op| S| 1  1  0  1  0  0  0  0 |  rm  | opcode2 |  Rn  |  Rd |
3907  * +--+--+--+------------------------+------+---------+------+-----+
3908  *                                            [000000]
3909  */
3910
3911 static void disas_adc_sbc(DisasContext *s, uint32_t insn)
3912 {
3913     unsigned int sf, op, setflags, rm, rn, rd;
3914     TCGv_i64 tcg_y, tcg_rn, tcg_rd;
3915
3916     if (extract32(insn, 10, 6) != 0) {
3917         unallocated_encoding(s);
3918         return;
3919     }
3920
3921     sf = extract32(insn, 31, 1);
3922     op = extract32(insn, 30, 1);
3923     setflags = extract32(insn, 29, 1);
3924     rm = extract32(insn, 16, 5);
3925     rn = extract32(insn, 5, 5);
3926     rd = extract32(insn, 0, 5);
3927
3928     tcg_rd = cpu_reg(s, rd);
3929     tcg_rn = cpu_reg(s, rn);
3930
3931     if (op) {
3932         tcg_y = new_tmp_a64(s);
3933         tcg_gen_not_i64(tcg_y, cpu_reg(s, rm));
3934     } else {
3935         tcg_y = cpu_reg(s, rm);
3936     }
3937
3938     if (setflags) {
3939         gen_adc_CC(sf, tcg_rd, tcg_rn, tcg_y);
3940     } else {
3941         gen_adc(sf, tcg_rd, tcg_rn, tcg_y);
3942     }
3943 }
3944
3945 /* Conditional compare (immediate / register)
3946  *  31 30 29 28 27 26 25 24 23 22 21  20    16 15  12  11  10  9   5  4 3   0
3947  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3948  * |sf|op| S| 1  1  0  1  0  0  1  0 |imm5/rm | cond |i/r |o2|  Rn  |o3|nzcv |
3949  * +--+--+--+------------------------+--------+------+----+--+------+--+-----+
3950  *        [1]                             y                [0]       [0]
3951  */
3952 static void disas_cc(DisasContext *s, uint32_t insn)
3953 {
3954     unsigned int sf, op, y, cond, rn, nzcv, is_imm;
3955     TCGv_i32 tcg_t0, tcg_t1, tcg_t2;
3956     TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
3957     DisasCompare c;
3958
3959     if (!extract32(insn, 29, 1)) {
3960         unallocated_encoding(s);
3961         return;
3962     }
3963     if (insn & (1 << 10 | 1 << 4)) {
3964         unallocated_encoding(s);
3965         return;
3966     }
3967     sf = extract32(insn, 31, 1);
3968     op = extract32(insn, 30, 1);
3969     is_imm = extract32(insn, 11, 1);
3970     y = extract32(insn, 16, 5); /* y = rm (reg) or imm5 (imm) */
3971     cond = extract32(insn, 12, 4);
3972     rn = extract32(insn, 5, 5);
3973     nzcv = extract32(insn, 0, 4);
3974
3975     /* Set T0 = !COND.  */
3976     tcg_t0 = tcg_temp_new_i32();
3977     arm_test_cc(&c, cond);
3978     tcg_gen_setcondi_i32(tcg_invert_cond(c.cond), tcg_t0, c.value, 0);
3979     arm_free_cc(&c);
3980
3981     /* Load the arguments for the new comparison.  */
3982     if (is_imm) {
3983         tcg_y = new_tmp_a64(s);
3984         tcg_gen_movi_i64(tcg_y, y);
3985     } else {
3986         tcg_y = cpu_reg(s, y);
3987     }
3988     tcg_rn = cpu_reg(s, rn);
3989
3990     /* Set the flags for the new comparison.  */
3991     tcg_tmp = tcg_temp_new_i64();
3992     if (op) {
3993         gen_sub_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3994     } else {
3995         gen_add_CC(sf, tcg_tmp, tcg_rn, tcg_y);
3996     }
3997     tcg_temp_free_i64(tcg_tmp);
3998
3999     /* If COND was false, force the flags to #nzcv.  Compute two masks
4000      * to help with this: T1 = (COND ? 0 : -1), T2 = (COND ? -1 : 0).
4001      * For tcg hosts that support ANDC, we can make do with just T1.
4002      * In either case, allow the tcg optimizer to delete any unused mask.
4003      */
4004     tcg_t1 = tcg_temp_new_i32();
4005     tcg_t2 = tcg_temp_new_i32();
4006     tcg_gen_neg_i32(tcg_t1, tcg_t0);
4007     tcg_gen_subi_i32(tcg_t2, tcg_t0, 1);
4008
4009     if (nzcv & 8) { /* N */
4010         tcg_gen_or_i32(cpu_NF, cpu_NF, tcg_t1);
4011     } else {
4012         if (TCG_TARGET_HAS_andc_i32) {
4013             tcg_gen_andc_i32(cpu_NF, cpu_NF, tcg_t1);
4014         } else {
4015             tcg_gen_and_i32(cpu_NF, cpu_NF, tcg_t2);
4016         }
4017     }
4018     if (nzcv & 4) { /* Z */
4019         if (TCG_TARGET_HAS_andc_i32) {
4020             tcg_gen_andc_i32(cpu_ZF, cpu_ZF, tcg_t1);
4021         } else {
4022             tcg_gen_and_i32(cpu_ZF, cpu_ZF, tcg_t2);
4023         }
4024     } else {
4025         tcg_gen_or_i32(cpu_ZF, cpu_ZF, tcg_t0);
4026     }
4027     if (nzcv & 2) { /* C */
4028         tcg_gen_or_i32(cpu_CF, cpu_CF, tcg_t0);
4029     } else {
4030         if (TCG_TARGET_HAS_andc_i32) {
4031             tcg_gen_andc_i32(cpu_CF, cpu_CF, tcg_t1);
4032         } else {
4033             tcg_gen_and_i32(cpu_CF, cpu_CF, tcg_t2);
4034         }
4035     }
4036     if (nzcv & 1) { /* V */
4037         tcg_gen_or_i32(cpu_VF, cpu_VF, tcg_t1);
4038     } else {
4039         if (TCG_TARGET_HAS_andc_i32) {
4040             tcg_gen_andc_i32(cpu_VF, cpu_VF, tcg_t1);
4041         } else {
4042             tcg_gen_and_i32(cpu_VF, cpu_VF, tcg_t2);
4043         }
4044     }
4045     tcg_temp_free_i32(tcg_t0);
4046     tcg_temp_free_i32(tcg_t1);
4047     tcg_temp_free_i32(tcg_t2);
4048 }
4049
4050 /* Conditional select
4051  *   31   30  29  28             21 20  16 15  12 11 10 9    5 4    0
4052  * +----+----+---+-----------------+------+------+-----+------+------+
4053  * | sf | op | S | 1 1 0 1 0 1 0 0 |  Rm  | cond | op2 |  Rn  |  Rd  |
4054  * +----+----+---+-----------------+------+------+-----+------+------+
4055  */
4056 static void disas_cond_select(DisasContext *s, uint32_t insn)
4057 {
4058     unsigned int sf, else_inv, rm, cond, else_inc, rn, rd;
4059     TCGv_i64 tcg_rd, zero;
4060     DisasCompare64 c;
4061
4062     if (extract32(insn, 29, 1) || extract32(insn, 11, 1)) {
4063         /* S == 1 or op2<1> == 1 */
4064         unallocated_encoding(s);
4065         return;
4066     }
4067     sf = extract32(insn, 31, 1);
4068     else_inv = extract32(insn, 30, 1);
4069     rm = extract32(insn, 16, 5);
4070     cond = extract32(insn, 12, 4);
4071     else_inc = extract32(insn, 10, 1);
4072     rn = extract32(insn, 5, 5);
4073     rd = extract32(insn, 0, 5);
4074
4075     tcg_rd = cpu_reg(s, rd);
4076
4077     a64_test_cc(&c, cond);
4078     zero = tcg_const_i64(0);
4079
4080     if (rn == 31 && rm == 31 && (else_inc ^ else_inv)) {
4081         /* CSET & CSETM.  */
4082         tcg_gen_setcond_i64(tcg_invert_cond(c.cond), tcg_rd, c.value, zero);
4083         if (else_inv) {
4084             tcg_gen_neg_i64(tcg_rd, tcg_rd);
4085         }
4086     } else {
4087         TCGv_i64 t_true = cpu_reg(s, rn);
4088         TCGv_i64 t_false = read_cpu_reg(s, rm, 1);
4089         if (else_inv && else_inc) {
4090             tcg_gen_neg_i64(t_false, t_false);
4091         } else if (else_inv) {
4092             tcg_gen_not_i64(t_false, t_false);
4093         } else if (else_inc) {
4094             tcg_gen_addi_i64(t_false, t_false, 1);
4095         }
4096         tcg_gen_movcond_i64(c.cond, tcg_rd, c.value, zero, t_true, t_false);
4097     }
4098
4099     tcg_temp_free_i64(zero);
4100     a64_free_cc(&c);
4101
4102     if (!sf) {
4103         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4104     }
4105 }
4106
4107 static void handle_clz(DisasContext *s, unsigned int sf,
4108                        unsigned int rn, unsigned int rd)
4109 {
4110     TCGv_i64 tcg_rd, tcg_rn;
4111     tcg_rd = cpu_reg(s, rd);
4112     tcg_rn = cpu_reg(s, rn);
4113
4114     if (sf) {
4115         tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
4116     } else {
4117         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4118         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4119         tcg_gen_clzi_i32(tcg_tmp32, tcg_tmp32, 32);
4120         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4121         tcg_temp_free_i32(tcg_tmp32);
4122     }
4123 }
4124
4125 static void handle_cls(DisasContext *s, unsigned int sf,
4126                        unsigned int rn, unsigned int rd)
4127 {
4128     TCGv_i64 tcg_rd, tcg_rn;
4129     tcg_rd = cpu_reg(s, rd);
4130     tcg_rn = cpu_reg(s, rn);
4131
4132     if (sf) {
4133         tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
4134     } else {
4135         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4136         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4137         tcg_gen_clrsb_i32(tcg_tmp32, tcg_tmp32);
4138         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4139         tcg_temp_free_i32(tcg_tmp32);
4140     }
4141 }
4142
4143 static void handle_rbit(DisasContext *s, unsigned int sf,
4144                         unsigned int rn, unsigned int rd)
4145 {
4146     TCGv_i64 tcg_rd, tcg_rn;
4147     tcg_rd = cpu_reg(s, rd);
4148     tcg_rn = cpu_reg(s, rn);
4149
4150     if (sf) {
4151         gen_helper_rbit64(tcg_rd, tcg_rn);
4152     } else {
4153         TCGv_i32 tcg_tmp32 = tcg_temp_new_i32();
4154         tcg_gen_extrl_i64_i32(tcg_tmp32, tcg_rn);
4155         gen_helper_rbit(tcg_tmp32, tcg_tmp32);
4156         tcg_gen_extu_i32_i64(tcg_rd, tcg_tmp32);
4157         tcg_temp_free_i32(tcg_tmp32);
4158     }
4159 }
4160
4161 /* REV with sf==1, opcode==3 ("REV64") */
4162 static void handle_rev64(DisasContext *s, unsigned int sf,
4163                          unsigned int rn, unsigned int rd)
4164 {
4165     if (!sf) {
4166         unallocated_encoding(s);
4167         return;
4168     }
4169     tcg_gen_bswap64_i64(cpu_reg(s, rd), cpu_reg(s, rn));
4170 }
4171
4172 /* REV with sf==0, opcode==2
4173  * REV32 (sf==1, opcode==2)
4174  */
4175 static void handle_rev32(DisasContext *s, unsigned int sf,
4176                          unsigned int rn, unsigned int rd)
4177 {
4178     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4179
4180     if (sf) {
4181         TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4182         TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4183
4184         /* bswap32_i64 requires zero high word */
4185         tcg_gen_ext32u_i64(tcg_tmp, tcg_rn);
4186         tcg_gen_bswap32_i64(tcg_rd, tcg_tmp);
4187         tcg_gen_shri_i64(tcg_tmp, tcg_rn, 32);
4188         tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
4189         tcg_gen_concat32_i64(tcg_rd, tcg_rd, tcg_tmp);
4190
4191         tcg_temp_free_i64(tcg_tmp);
4192     } else {
4193         tcg_gen_ext32u_i64(tcg_rd, cpu_reg(s, rn));
4194         tcg_gen_bswap32_i64(tcg_rd, tcg_rd);
4195     }
4196 }
4197
4198 /* REV16 (opcode==1) */
4199 static void handle_rev16(DisasContext *s, unsigned int sf,
4200                          unsigned int rn, unsigned int rd)
4201 {
4202     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4203     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
4204     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4205     TCGv_i64 mask = tcg_const_i64(sf ? 0x00ff00ff00ff00ffull : 0x00ff00ff);
4206
4207     tcg_gen_shri_i64(tcg_tmp, tcg_rn, 8);
4208     tcg_gen_and_i64(tcg_rd, tcg_rn, mask);
4209     tcg_gen_and_i64(tcg_tmp, tcg_tmp, mask);
4210     tcg_gen_shli_i64(tcg_rd, tcg_rd, 8);
4211     tcg_gen_or_i64(tcg_rd, tcg_rd, tcg_tmp);
4212
4213     tcg_temp_free_i64(mask);
4214     tcg_temp_free_i64(tcg_tmp);
4215 }
4216
4217 /* Data-processing (1 source)
4218  *   31  30  29  28             21 20     16 15    10 9    5 4    0
4219  * +----+---+---+-----------------+---------+--------+------+------+
4220  * | sf | 1 | S | 1 1 0 1 0 1 1 0 | opcode2 | opcode |  Rn  |  Rd  |
4221  * +----+---+---+-----------------+---------+--------+------+------+
4222  */
4223 static void disas_data_proc_1src(DisasContext *s, uint32_t insn)
4224 {
4225     unsigned int sf, opcode, rn, rd;
4226
4227     if (extract32(insn, 29, 1) || extract32(insn, 16, 5)) {
4228         unallocated_encoding(s);
4229         return;
4230     }
4231
4232     sf = extract32(insn, 31, 1);
4233     opcode = extract32(insn, 10, 6);
4234     rn = extract32(insn, 5, 5);
4235     rd = extract32(insn, 0, 5);
4236
4237     switch (opcode) {
4238     case 0: /* RBIT */
4239         handle_rbit(s, sf, rn, rd);
4240         break;
4241     case 1: /* REV16 */
4242         handle_rev16(s, sf, rn, rd);
4243         break;
4244     case 2: /* REV32 */
4245         handle_rev32(s, sf, rn, rd);
4246         break;
4247     case 3: /* REV64 */
4248         handle_rev64(s, sf, rn, rd);
4249         break;
4250     case 4: /* CLZ */
4251         handle_clz(s, sf, rn, rd);
4252         break;
4253     case 5: /* CLS */
4254         handle_cls(s, sf, rn, rd);
4255         break;
4256     }
4257 }
4258
4259 static void handle_div(DisasContext *s, bool is_signed, unsigned int sf,
4260                        unsigned int rm, unsigned int rn, unsigned int rd)
4261 {
4262     TCGv_i64 tcg_n, tcg_m, tcg_rd;
4263     tcg_rd = cpu_reg(s, rd);
4264
4265     if (!sf && is_signed) {
4266         tcg_n = new_tmp_a64(s);
4267         tcg_m = new_tmp_a64(s);
4268         tcg_gen_ext32s_i64(tcg_n, cpu_reg(s, rn));
4269         tcg_gen_ext32s_i64(tcg_m, cpu_reg(s, rm));
4270     } else {
4271         tcg_n = read_cpu_reg(s, rn, sf);
4272         tcg_m = read_cpu_reg(s, rm, sf);
4273     }
4274
4275     if (is_signed) {
4276         gen_helper_sdiv64(tcg_rd, tcg_n, tcg_m);
4277     } else {
4278         gen_helper_udiv64(tcg_rd, tcg_n, tcg_m);
4279     }
4280
4281     if (!sf) { /* zero extend final result */
4282         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
4283     }
4284 }
4285
4286 /* LSLV, LSRV, ASRV, RORV */
4287 static void handle_shift_reg(DisasContext *s,
4288                              enum a64_shift_type shift_type, unsigned int sf,
4289                              unsigned int rm, unsigned int rn, unsigned int rd)
4290 {
4291     TCGv_i64 tcg_shift = tcg_temp_new_i64();
4292     TCGv_i64 tcg_rd = cpu_reg(s, rd);
4293     TCGv_i64 tcg_rn = read_cpu_reg(s, rn, sf);
4294
4295     tcg_gen_andi_i64(tcg_shift, cpu_reg(s, rm), sf ? 63 : 31);
4296     shift_reg(tcg_rd, tcg_rn, sf, shift_type, tcg_shift);
4297     tcg_temp_free_i64(tcg_shift);
4298 }
4299
4300 /* CRC32[BHWX], CRC32C[BHWX] */
4301 static void handle_crc32(DisasContext *s,
4302                          unsigned int sf, unsigned int sz, bool crc32c,
4303                          unsigned int rm, unsigned int rn, unsigned int rd)
4304 {
4305     TCGv_i64 tcg_acc, tcg_val;
4306     TCGv_i32 tcg_bytes;
4307
4308     if (!arm_dc_feature(s, ARM_FEATURE_CRC)
4309         || (sf == 1 && sz != 3)
4310         || (sf == 0 && sz == 3)) {
4311         unallocated_encoding(s);
4312         return;
4313     }
4314
4315     if (sz == 3) {
4316         tcg_val = cpu_reg(s, rm);
4317     } else {
4318         uint64_t mask;
4319         switch (sz) {
4320         case 0:
4321             mask = 0xFF;
4322             break;
4323         case 1:
4324             mask = 0xFFFF;
4325             break;
4326         case 2:
4327             mask = 0xFFFFFFFF;
4328             break;
4329         default:
4330             g_assert_not_reached();
4331         }
4332         tcg_val = new_tmp_a64(s);
4333         tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
4334     }
4335
4336     tcg_acc = cpu_reg(s, rn);
4337     tcg_bytes = tcg_const_i32(1 << sz);
4338
4339     if (crc32c) {
4340         gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4341     } else {
4342         gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
4343     }
4344
4345     tcg_temp_free_i32(tcg_bytes);
4346 }
4347
4348 /* Data-processing (2 source)
4349  *   31   30  29 28             21 20  16 15    10 9    5 4    0
4350  * +----+---+---+-----------------+------+--------+------+------+
4351  * | sf | 0 | S | 1 1 0 1 0 1 1 0 |  Rm  | opcode |  Rn  |  Rd  |
4352  * +----+---+---+-----------------+------+--------+------+------+
4353  */
4354 static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
4355 {
4356     unsigned int sf, rm, opcode, rn, rd;
4357     sf = extract32(insn, 31, 1);
4358     rm = extract32(insn, 16, 5);
4359     opcode = extract32(insn, 10, 6);
4360     rn = extract32(insn, 5, 5);
4361     rd = extract32(insn, 0, 5);
4362
4363     if (extract32(insn, 29, 1)) {
4364         unallocated_encoding(s);
4365         return;
4366     }
4367
4368     switch (opcode) {
4369     case 2: /* UDIV */
4370         handle_div(s, false, sf, rm, rn, rd);
4371         break;
4372     case 3: /* SDIV */
4373         handle_div(s, true, sf, rm, rn, rd);
4374         break;
4375     case 8: /* LSLV */
4376         handle_shift_reg(s, A64_SHIFT_TYPE_LSL, sf, rm, rn, rd);
4377         break;
4378     case 9: /* LSRV */
4379         handle_shift_reg(s, A64_SHIFT_TYPE_LSR, sf, rm, rn, rd);
4380         break;
4381     case 10: /* ASRV */
4382         handle_shift_reg(s, A64_SHIFT_TYPE_ASR, sf, rm, rn, rd);
4383         break;
4384     case 11: /* RORV */
4385         handle_shift_reg(s, A64_SHIFT_TYPE_ROR, sf, rm, rn, rd);
4386         break;
4387     case 16:
4388     case 17:
4389     case 18:
4390     case 19:
4391     case 20:
4392     case 21:
4393     case 22:
4394     case 23: /* CRC32 */
4395     {
4396         int sz = extract32(opcode, 0, 2);
4397         bool crc32c = extract32(opcode, 2, 1);
4398         handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
4399         break;
4400     }
4401     default:
4402         unallocated_encoding(s);
4403         break;
4404     }
4405 }
4406
4407 /* Data processing - register */
4408 static void disas_data_proc_reg(DisasContext *s, uint32_t insn)
4409 {
4410     switch (extract32(insn, 24, 5)) {
4411     case 0x0a: /* Logical (shifted register) */
4412         disas_logic_reg(s, insn);
4413         break;
4414     case 0x0b: /* Add/subtract */
4415         if (insn & (1 << 21)) { /* (extended register) */
4416             disas_add_sub_ext_reg(s, insn);
4417         } else {
4418             disas_add_sub_reg(s, insn);
4419         }
4420         break;
4421     case 0x1b: /* Data-processing (3 source) */
4422         disas_data_proc_3src(s, insn);
4423         break;
4424     case 0x1a:
4425         switch (extract32(insn, 21, 3)) {
4426         case 0x0: /* Add/subtract (with carry) */
4427             disas_adc_sbc(s, insn);
4428             break;
4429         case 0x2: /* Conditional compare */
4430             disas_cc(s, insn); /* both imm and reg forms */
4431             break;
4432         case 0x4: /* Conditional select */
4433             disas_cond_select(s, insn);
4434             break;
4435         case 0x6: /* Data-processing */
4436             if (insn & (1 << 30)) { /* (1 source) */
4437                 disas_data_proc_1src(s, insn);
4438             } else {            /* (2 source) */
4439                 disas_data_proc_2src(s, insn);
4440             }
4441             break;
4442         default:
4443             unallocated_encoding(s);
4444             break;
4445         }
4446         break;
4447     default:
4448         unallocated_encoding(s);
4449         break;
4450     }
4451 }
4452
4453 static void handle_fp_compare(DisasContext *s, bool is_double,
4454                               unsigned int rn, unsigned int rm,
4455                               bool cmp_with_zero, bool signal_all_nans)
4456 {
4457     TCGv_i64 tcg_flags = tcg_temp_new_i64();
4458     TCGv_ptr fpst = get_fpstatus_ptr(false);
4459
4460     if (is_double) {
4461         TCGv_i64 tcg_vn, tcg_vm;
4462
4463         tcg_vn = read_fp_dreg(s, rn);
4464         if (cmp_with_zero) {
4465             tcg_vm = tcg_const_i64(0);
4466         } else {
4467             tcg_vm = read_fp_dreg(s, rm);
4468         }
4469         if (signal_all_nans) {
4470             gen_helper_vfp_cmped_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4471         } else {
4472             gen_helper_vfp_cmpd_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4473         }
4474         tcg_temp_free_i64(tcg_vn);
4475         tcg_temp_free_i64(tcg_vm);
4476     } else {
4477         TCGv_i32 tcg_vn, tcg_vm;
4478
4479         tcg_vn = read_fp_sreg(s, rn);
4480         if (cmp_with_zero) {
4481             tcg_vm = tcg_const_i32(0);
4482         } else {
4483             tcg_vm = read_fp_sreg(s, rm);
4484         }
4485         if (signal_all_nans) {
4486             gen_helper_vfp_cmpes_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4487         } else {
4488             gen_helper_vfp_cmps_a64(tcg_flags, tcg_vn, tcg_vm, fpst);
4489         }
4490         tcg_temp_free_i32(tcg_vn);
4491         tcg_temp_free_i32(tcg_vm);
4492     }
4493
4494     tcg_temp_free_ptr(fpst);
4495
4496     gen_set_nzcv(tcg_flags);
4497
4498     tcg_temp_free_i64(tcg_flags);
4499 }
4500
4501 /* Floating point compare
4502  *   31  30  29 28       24 23  22  21 20  16 15 14 13  10    9    5 4     0
4503  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4504  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | op  | 1 0 0 0 |  Rn  |  op2  |
4505  * +---+---+---+-----------+------+---+------+-----+---------+------+-------+
4506  */
4507 static void disas_fp_compare(DisasContext *s, uint32_t insn)
4508 {
4509     unsigned int mos, type, rm, op, rn, opc, op2r;
4510
4511     mos = extract32(insn, 29, 3);
4512     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4513     rm = extract32(insn, 16, 5);
4514     op = extract32(insn, 14, 2);
4515     rn = extract32(insn, 5, 5);
4516     opc = extract32(insn, 3, 2);
4517     op2r = extract32(insn, 0, 3);
4518
4519     if (mos || op || op2r || type > 1) {
4520         unallocated_encoding(s);
4521         return;
4522     }
4523
4524     if (!fp_access_check(s)) {
4525         return;
4526     }
4527
4528     handle_fp_compare(s, type, rn, rm, opc & 1, opc & 2);
4529 }
4530
4531 /* Floating point conditional compare
4532  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5  4   3    0
4533  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4534  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 0 1 |  Rn  | op | nzcv |
4535  * +---+---+---+-----------+------+---+------+------+-----+------+----+------+
4536  */
4537 static void disas_fp_ccomp(DisasContext *s, uint32_t insn)
4538 {
4539     unsigned int mos, type, rm, cond, rn, op, nzcv;
4540     TCGv_i64 tcg_flags;
4541     TCGLabel *label_continue = NULL;
4542
4543     mos = extract32(insn, 29, 3);
4544     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4545     rm = extract32(insn, 16, 5);
4546     cond = extract32(insn, 12, 4);
4547     rn = extract32(insn, 5, 5);
4548     op = extract32(insn, 4, 1);
4549     nzcv = extract32(insn, 0, 4);
4550
4551     if (mos || type > 1) {
4552         unallocated_encoding(s);
4553         return;
4554     }
4555
4556     if (!fp_access_check(s)) {
4557         return;
4558     }
4559
4560     if (cond < 0x0e) { /* not always */
4561         TCGLabel *label_match = gen_new_label();
4562         label_continue = gen_new_label();
4563         arm_gen_test_cc(cond, label_match);
4564         /* nomatch: */
4565         tcg_flags = tcg_const_i64(nzcv << 28);
4566         gen_set_nzcv(tcg_flags);
4567         tcg_temp_free_i64(tcg_flags);
4568         tcg_gen_br(label_continue);
4569         gen_set_label(label_match);
4570     }
4571
4572     handle_fp_compare(s, type, rn, rm, false, op);
4573
4574     if (cond < 0x0e) {
4575         gen_set_label(label_continue);
4576     }
4577 }
4578
4579 /* Floating point conditional select
4580  *   31  30  29 28       24 23  22  21 20  16 15  12 11 10 9    5 4    0
4581  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4582  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | cond | 1 1 |  Rn  |  Rd  |
4583  * +---+---+---+-----------+------+---+------+------+-----+------+------+
4584  */
4585 static void disas_fp_csel(DisasContext *s, uint32_t insn)
4586 {
4587     unsigned int mos, type, rm, cond, rn, rd;
4588     TCGv_i64 t_true, t_false, t_zero;
4589     DisasCompare64 c;
4590
4591     mos = extract32(insn, 29, 3);
4592     type = extract32(insn, 22, 2); /* 0 = single, 1 = double */
4593     rm = extract32(insn, 16, 5);
4594     cond = extract32(insn, 12, 4);
4595     rn = extract32(insn, 5, 5);
4596     rd = extract32(insn, 0, 5);
4597
4598     if (mos || type > 1) {
4599         unallocated_encoding(s);
4600         return;
4601     }
4602
4603     if (!fp_access_check(s)) {
4604         return;
4605     }
4606
4607     /* Zero extend sreg inputs to 64 bits now.  */
4608     t_true = tcg_temp_new_i64();
4609     t_false = tcg_temp_new_i64();
4610     read_vec_element(s, t_true, rn, 0, type ? MO_64 : MO_32);
4611     read_vec_element(s, t_false, rm, 0, type ? MO_64 : MO_32);
4612
4613     a64_test_cc(&c, cond);
4614     t_zero = tcg_const_i64(0);
4615     tcg_gen_movcond_i64(c.cond, t_true, c.value, t_zero, t_true, t_false);
4616     tcg_temp_free_i64(t_zero);
4617     tcg_temp_free_i64(t_false);
4618     a64_free_cc(&c);
4619
4620     /* Note that sregs write back zeros to the high bits,
4621        and we've already done the zero-extension.  */
4622     write_fp_dreg(s, rd, t_true);
4623     tcg_temp_free_i64(t_true);
4624 }
4625
4626 /* Floating-point data-processing (1 source) - half precision */
4627 static void handle_fp_1src_half(DisasContext *s, int opcode, int rd, int rn)
4628 {
4629     TCGv_ptr fpst = NULL;
4630     TCGv_i32 tcg_op = tcg_temp_new_i32();
4631     TCGv_i32 tcg_res = tcg_temp_new_i32();
4632
4633     read_vec_element_i32(s, tcg_op, rn, 0, MO_16);
4634
4635     switch (opcode) {
4636     case 0x0: /* FMOV */
4637         tcg_gen_mov_i32(tcg_res, tcg_op);
4638         break;
4639     case 0x1: /* FABS */
4640         tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
4641         break;
4642     case 0x2: /* FNEG */
4643         tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
4644         break;
4645     case 0x3: /* FSQRT */
4646         gen_helper_sqrt_f16(tcg_res, tcg_op, cpu_env);
4647         break;
4648     case 0x8: /* FRINTN */
4649     case 0x9: /* FRINTP */
4650     case 0xa: /* FRINTM */
4651     case 0xb: /* FRINTZ */
4652     case 0xc: /* FRINTA */
4653     {
4654         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4655         fpst = get_fpstatus_ptr(true);
4656
4657         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4658         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
4659
4660         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4661         tcg_temp_free_i32(tcg_rmode);
4662         break;
4663     }
4664     case 0xe: /* FRINTX */
4665         fpst = get_fpstatus_ptr(true);
4666         gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, fpst);
4667         break;
4668     case 0xf: /* FRINTI */
4669         fpst = get_fpstatus_ptr(true);
4670         gen_helper_advsimd_rinth(tcg_res, tcg_op, fpst);
4671         break;
4672     default:
4673         abort();
4674     }
4675
4676     write_fp_sreg(s, rd, tcg_res);
4677
4678     if (fpst) {
4679         tcg_temp_free_ptr(fpst);
4680     }
4681     tcg_temp_free_i32(tcg_op);
4682     tcg_temp_free_i32(tcg_res);
4683 }
4684
4685 /* Floating-point data-processing (1 source) - single precision */
4686 static void handle_fp_1src_single(DisasContext *s, int opcode, int rd, int rn)
4687 {
4688     TCGv_ptr fpst;
4689     TCGv_i32 tcg_op;
4690     TCGv_i32 tcg_res;
4691
4692     fpst = get_fpstatus_ptr(false);
4693     tcg_op = read_fp_sreg(s, rn);
4694     tcg_res = tcg_temp_new_i32();
4695
4696     switch (opcode) {
4697     case 0x0: /* FMOV */
4698         tcg_gen_mov_i32(tcg_res, tcg_op);
4699         break;
4700     case 0x1: /* FABS */
4701         gen_helper_vfp_abss(tcg_res, tcg_op);
4702         break;
4703     case 0x2: /* FNEG */
4704         gen_helper_vfp_negs(tcg_res, tcg_op);
4705         break;
4706     case 0x3: /* FSQRT */
4707         gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
4708         break;
4709     case 0x8: /* FRINTN */
4710     case 0x9: /* FRINTP */
4711     case 0xa: /* FRINTM */
4712     case 0xb: /* FRINTZ */
4713     case 0xc: /* FRINTA */
4714     {
4715         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4716
4717         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4718         gen_helper_rints(tcg_res, tcg_op, fpst);
4719
4720         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4721         tcg_temp_free_i32(tcg_rmode);
4722         break;
4723     }
4724     case 0xe: /* FRINTX */
4725         gen_helper_rints_exact(tcg_res, tcg_op, fpst);
4726         break;
4727     case 0xf: /* FRINTI */
4728         gen_helper_rints(tcg_res, tcg_op, fpst);
4729         break;
4730     default:
4731         abort();
4732     }
4733
4734     write_fp_sreg(s, rd, tcg_res);
4735
4736     tcg_temp_free_ptr(fpst);
4737     tcg_temp_free_i32(tcg_op);
4738     tcg_temp_free_i32(tcg_res);
4739 }
4740
4741 /* Floating-point data-processing (1 source) - double precision */
4742 static void handle_fp_1src_double(DisasContext *s, int opcode, int rd, int rn)
4743 {
4744     TCGv_ptr fpst;
4745     TCGv_i64 tcg_op;
4746     TCGv_i64 tcg_res;
4747
4748     switch (opcode) {
4749     case 0x0: /* FMOV */
4750         gen_gvec_fn2(s, false, rd, rn, tcg_gen_gvec_mov, 0);
4751         return;
4752     }
4753
4754     fpst = get_fpstatus_ptr(false);
4755     tcg_op = read_fp_dreg(s, rn);
4756     tcg_res = tcg_temp_new_i64();
4757
4758     switch (opcode) {
4759     case 0x1: /* FABS */
4760         gen_helper_vfp_absd(tcg_res, tcg_op);
4761         break;
4762     case 0x2: /* FNEG */
4763         gen_helper_vfp_negd(tcg_res, tcg_op);
4764         break;
4765     case 0x3: /* FSQRT */
4766         gen_helper_vfp_sqrtd(tcg_res, tcg_op, cpu_env);
4767         break;
4768     case 0x8: /* FRINTN */
4769     case 0x9: /* FRINTP */
4770     case 0xa: /* FRINTM */
4771     case 0xb: /* FRINTZ */
4772     case 0xc: /* FRINTA */
4773     {
4774         TCGv_i32 tcg_rmode = tcg_const_i32(arm_rmode_to_sf(opcode & 7));
4775
4776         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4777         gen_helper_rintd(tcg_res, tcg_op, fpst);
4778
4779         gen_helper_set_rmode(tcg_rmode, tcg_rmode, fpst);
4780         tcg_temp_free_i32(tcg_rmode);
4781         break;
4782     }
4783     case 0xe: /* FRINTX */
4784         gen_helper_rintd_exact(tcg_res, tcg_op, fpst);
4785         break;
4786     case 0xf: /* FRINTI */
4787         gen_helper_rintd(tcg_res, tcg_op, fpst);
4788         break;
4789     default:
4790         abort();
4791     }
4792
4793     write_fp_dreg(s, rd, tcg_res);
4794
4795     tcg_temp_free_ptr(fpst);
4796     tcg_temp_free_i64(tcg_op);
4797     tcg_temp_free_i64(tcg_res);
4798 }
4799
4800 static void handle_fp_fcvt(DisasContext *s, int opcode,
4801                            int rd, int rn, int dtype, int ntype)
4802 {
4803     switch (ntype) {
4804     case 0x0:
4805     {
4806         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4807         if (dtype == 1) {
4808             /* Single to double */
4809             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4810             gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, cpu_env);
4811             write_fp_dreg(s, rd, tcg_rd);
4812             tcg_temp_free_i64(tcg_rd);
4813         } else {
4814             /* Single to half */
4815             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4816             gen_helper_vfp_fcvt_f32_to_f16(tcg_rd, tcg_rn, cpu_env);
4817             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4818             write_fp_sreg(s, rd, tcg_rd);
4819             tcg_temp_free_i32(tcg_rd);
4820         }
4821         tcg_temp_free_i32(tcg_rn);
4822         break;
4823     }
4824     case 0x1:
4825     {
4826         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
4827         TCGv_i32 tcg_rd = tcg_temp_new_i32();
4828         if (dtype == 0) {
4829             /* Double to single */
4830             gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, cpu_env);
4831         } else {
4832             /* Double to half */
4833             gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, cpu_env);
4834             /* write_fp_sreg is OK here because top half of tcg_rd is zero */
4835         }
4836         write_fp_sreg(s, rd, tcg_rd);
4837         tcg_temp_free_i32(tcg_rd);
4838         tcg_temp_free_i64(tcg_rn);
4839         break;
4840     }
4841     case 0x3:
4842     {
4843         TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
4844         tcg_gen_ext16u_i32(tcg_rn, tcg_rn);
4845         if (dtype == 0) {
4846             /* Half to single */
4847             TCGv_i32 tcg_rd = tcg_temp_new_i32();
4848             gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, cpu_env);
4849             write_fp_sreg(s, rd, tcg_rd);
4850             tcg_temp_free_i32(tcg_rd);
4851         } else {
4852             /* Half to double */
4853             TCGv_i64 tcg_rd = tcg_temp_new_i64();
4854             gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, cpu_env);
4855             write_fp_dreg(s, rd, tcg_rd);
4856             tcg_temp_free_i64(tcg_rd);
4857         }
4858         tcg_temp_free_i32(tcg_rn);
4859         break;
4860     }
4861     default:
4862         abort();
4863     }
4864 }
4865
4866 /* Floating point data-processing (1 source)
4867  *   31  30  29 28       24 23  22  21 20    15 14       10 9    5 4    0
4868  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4869  * | M | 0 | S | 1 1 1 1 0 | type | 1 | opcode | 1 0 0 0 0 |  Rn  |  Rd  |
4870  * +---+---+---+-----------+------+---+--------+-----------+------+------+
4871  */
4872 static void disas_fp_1src(DisasContext *s, uint32_t insn)
4873 {
4874     int type = extract32(insn, 22, 2);
4875     int opcode = extract32(insn, 15, 6);
4876     int rn = extract32(insn, 5, 5);
4877     int rd = extract32(insn, 0, 5);
4878
4879     switch (opcode) {
4880     case 0x4: case 0x5: case 0x7:
4881     {
4882         /* FCVT between half, single and double precision */
4883         int dtype = extract32(opcode, 0, 2);
4884         if (type == 2 || dtype == type) {
4885             unallocated_encoding(s);
4886             return;
4887         }
4888         if (!fp_access_check(s)) {
4889             return;
4890         }
4891
4892         handle_fp_fcvt(s, opcode, rd, rn, dtype, type);
4893         break;
4894     }
4895     case 0x0 ... 0x3:
4896     case 0x8 ... 0xc:
4897     case 0xe ... 0xf:
4898         /* 32-to-32 and 64-to-64 ops */
4899         switch (type) {
4900         case 0:
4901             if (!fp_access_check(s)) {
4902                 return;
4903             }
4904
4905             handle_fp_1src_single(s, opcode, rd, rn);
4906             break;
4907         case 1:
4908             if (!fp_access_check(s)) {
4909                 return;
4910             }
4911
4912             handle_fp_1src_double(s, opcode, rd, rn);
4913             break;
4914         case 3:
4915             if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
4916                 unallocated_encoding(s);
4917                 return;
4918             }
4919
4920             if (!fp_access_check(s)) {
4921                 return;
4922             }
4923
4924             handle_fp_1src_half(s, opcode, rd, rn);
4925             break;
4926         default:
4927             unallocated_encoding(s);
4928         }
4929         break;
4930     default:
4931         unallocated_encoding(s);
4932         break;
4933     }
4934 }
4935
4936 /* Floating-point data-processing (2 source) - single precision */
4937 static void handle_fp_2src_single(DisasContext *s, int opcode,
4938                                   int rd, int rn, int rm)
4939 {
4940     TCGv_i32 tcg_op1;
4941     TCGv_i32 tcg_op2;
4942     TCGv_i32 tcg_res;
4943     TCGv_ptr fpst;
4944
4945     tcg_res = tcg_temp_new_i32();
4946     fpst = get_fpstatus_ptr(false);
4947     tcg_op1 = read_fp_sreg(s, rn);
4948     tcg_op2 = read_fp_sreg(s, rm);
4949
4950     switch (opcode) {
4951     case 0x0: /* FMUL */
4952         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4953         break;
4954     case 0x1: /* FDIV */
4955         gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
4956         break;
4957     case 0x2: /* FADD */
4958         gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
4959         break;
4960     case 0x3: /* FSUB */
4961         gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
4962         break;
4963     case 0x4: /* FMAX */
4964         gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
4965         break;
4966     case 0x5: /* FMIN */
4967         gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
4968         break;
4969     case 0x6: /* FMAXNM */
4970         gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
4971         break;
4972     case 0x7: /* FMINNM */
4973         gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
4974         break;
4975     case 0x8: /* FNMUL */
4976         gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
4977         gen_helper_vfp_negs(tcg_res, tcg_res);
4978         break;
4979     }
4980
4981     write_fp_sreg(s, rd, tcg_res);
4982
4983     tcg_temp_free_ptr(fpst);
4984     tcg_temp_free_i32(tcg_op1);
4985     tcg_temp_free_i32(tcg_op2);
4986     tcg_temp_free_i32(tcg_res);
4987 }
4988
4989 /* Floating-point data-processing (2 source) - double precision */
4990 static void handle_fp_2src_double(DisasContext *s, int opcode,
4991                                   int rd, int rn, int rm)
4992 {
4993     TCGv_i64 tcg_op1;
4994     TCGv_i64 tcg_op2;
4995     TCGv_i64 tcg_res;
4996     TCGv_ptr fpst;
4997
4998     tcg_res = tcg_temp_new_i64();
4999     fpst = get_fpstatus_ptr(false);
5000     tcg_op1 = read_fp_dreg(s, rn);
5001     tcg_op2 = read_fp_dreg(s, rm);
5002
5003     switch (opcode) {
5004     case 0x0: /* FMUL */
5005         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5006         break;
5007     case 0x1: /* FDIV */
5008         gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
5009         break;
5010     case 0x2: /* FADD */
5011         gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
5012         break;
5013     case 0x3: /* FSUB */
5014         gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
5015         break;
5016     case 0x4: /* FMAX */
5017         gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
5018         break;
5019     case 0x5: /* FMIN */
5020         gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
5021         break;
5022     case 0x6: /* FMAXNM */
5023         gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5024         break;
5025     case 0x7: /* FMINNM */
5026         gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
5027         break;
5028     case 0x8: /* FNMUL */
5029         gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
5030         gen_helper_vfp_negd(tcg_res, tcg_res);
5031         break;
5032     }
5033
5034     write_fp_dreg(s, rd, tcg_res);
5035
5036     tcg_temp_free_ptr(fpst);
5037     tcg_temp_free_i64(tcg_op1);
5038     tcg_temp_free_i64(tcg_op2);
5039     tcg_temp_free_i64(tcg_res);
5040 }
5041
5042 /* Floating point data-processing (2 source)
5043  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
5044  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
5045  * | M | 0 | S | 1 1 1 1 0 | type | 1 |  Rm  | opcode | 1 0 |  Rn  |  Rd  |
5046  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
5047  */
5048 static void disas_fp_2src(DisasContext *s, uint32_t insn)
5049 {
5050     int type = extract32(insn, 22, 2);
5051     int rd = extract32(insn, 0, 5);
5052     int rn = extract32(insn, 5, 5);
5053     int rm = extract32(insn, 16, 5);
5054     int opcode = extract32(insn, 12, 4);
5055
5056     if (opcode > 8) {
5057         unallocated_encoding(s);
5058         return;
5059     }
5060
5061     switch (type) {
5062     case 0:
5063         if (!fp_access_check(s)) {
5064             return;
5065         }
5066         handle_fp_2src_single(s, opcode, rd, rn, rm);
5067         break;
5068     case 1:
5069         if (!fp_access_check(s)) {
5070             return;
5071         }
5072         handle_fp_2src_double(s, opcode, rd, rn, rm);
5073         break;
5074     default:
5075         unallocated_encoding(s);
5076     }
5077 }
5078
5079 /* Floating-point data-processing (3 source) - single precision */
5080 static void handle_fp_3src_single(DisasContext *s, bool o0, bool o1,
5081                                   int rd, int rn, int rm, int ra)
5082 {
5083     TCGv_i32 tcg_op1, tcg_op2, tcg_op3;
5084     TCGv_i32 tcg_res = tcg_temp_new_i32();
5085     TCGv_ptr fpst = get_fpstatus_ptr(false);
5086
5087     tcg_op1 = read_fp_sreg(s, rn);
5088     tcg_op2 = read_fp_sreg(s, rm);
5089     tcg_op3 = read_fp_sreg(s, ra);
5090
5091     /* These are fused multiply-add, and must be done as one
5092      * floating point operation with no rounding between the
5093      * multiplication and addition steps.
5094      * NB that doing the negations here as separate steps is
5095      * correct : an input NaN should come out with its sign bit
5096      * flipped if it is a negated-input.
5097      */
5098     if (o1 == true) {
5099         gen_helper_vfp_negs(tcg_op3, tcg_op3);
5100     }
5101
5102     if (o0 != o1) {
5103         gen_helper_vfp_negs(tcg_op1, tcg_op1);
5104     }
5105
5106     gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
5107
5108     write_fp_sreg(s, rd, tcg_res);
5109
5110     tcg_temp_free_ptr(fpst);
5111     tcg_temp_free_i32(tcg_op1);
5112     tcg_temp_free_i32(tcg_op2);
5113     tcg_temp_free_i32(tcg_op3);
5114     tcg_temp_free_i32(tcg_res);
5115 }
5116
5117 /* Floating-point data-processing (3 source) - double precision */
5118 static void handle_fp_3src_double(DisasContext *s, bool o0, bool o1,
5119                                   int rd, int rn, int rm, int ra)
5120 {
5121     TCGv_i64 tcg_op1, tcg_op2, tcg_op3;
5122     TCGv_i64 tcg_res = tcg_temp_new_i64();
5123     TCGv_ptr fpst = get_fpstatus_ptr(false);
5124
5125     tcg_op1 = read_fp_dreg(s, rn);
5126     tcg_op2 = read_fp_dreg(s, rm);
5127     tcg_op3 = read_fp_dreg(s, ra);
5128
5129     /* These are fused multiply-add, and must be done as one
5130      * floating point operation with no rounding between the
5131      * multiplication and addition steps.
5132      * NB that doing the negations here as separate steps is
5133      * correct : an input NaN should come out with its sign bit
5134      * flipped if it is a negated-input.
5135      */
5136     if (o1 == true) {
5137         gen_helper_vfp_negd(tcg_op3, tcg_op3);
5138     }
5139
5140     if (o0 != o1) {
5141         gen_helper_vfp_negd(tcg_op1, tcg_op1);
5142     }
5143
5144     gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2, tcg_op3, fpst);
5145
5146     write_fp_dreg(s, rd, tcg_res);
5147
5148     tcg_temp_free_ptr(fpst);
5149     tcg_temp_free_i64(tcg_op1);
5150     tcg_temp_free_i64(tcg_op2);
5151     tcg_temp_free_i64(tcg_op3);
5152     tcg_temp_free_i64(tcg_res);
5153 }
5154
5155 /* Floating point data-processing (3 source)
5156  *   31  30  29 28       24 23  22  21  20  16  15  14  10 9    5 4    0
5157  * +---+---+---+-----------+------+----+------+----+------+------+------+
5158  * | M | 0 | S | 1 1 1 1 1 | type | o1 |  Rm  | o0 |  Ra  |  Rn  |  Rd  |
5159  * +---+---+---+-----------+------+----+------+----+------+------+------+
5160  */
5161 static void disas_fp_3src(DisasContext *s, uint32_t insn)
5162 {
5163     int type = extract32(insn, 22, 2);
5164     int rd = extract32(insn, 0, 5);
5165     int rn = extract32(insn, 5, 5);
5166     int ra = extract32(insn, 10, 5);
5167     int rm = extract32(insn, 16, 5);
5168     bool o0 = extract32(insn, 15, 1);
5169     bool o1 = extract32(insn, 21, 1);
5170
5171     switch (type) {
5172     case 0:
5173         if (!fp_access_check(s)) {
5174             return;
5175         }
5176         handle_fp_3src_single(s, o0, o1, rd, rn, rm, ra);
5177         break;
5178     case 1:
5179         if (!fp_access_check(s)) {
5180             return;
5181         }
5182         handle_fp_3src_double(s, o0, o1, rd, rn, rm, ra);
5183         break;
5184     default:
5185         unallocated_encoding(s);
5186     }
5187 }
5188
5189 /* The imm8 encodes the sign bit, enough bits to represent an exponent in
5190  * the range 01....1xx to 10....0xx, and the most significant 4 bits of
5191  * the mantissa; see VFPExpandImm() in the v8 ARM ARM.
5192  */
5193 static uint64_t vfp_expand_imm(int size, uint8_t imm8)
5194 {
5195     uint64_t imm;
5196
5197     switch (size) {
5198     case MO_64:
5199         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5200             (extract32(imm8, 6, 1) ? 0x3fc0 : 0x4000) |
5201             extract32(imm8, 0, 6);
5202         imm <<= 48;
5203         break;
5204     case MO_32:
5205         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5206             (extract32(imm8, 6, 1) ? 0x3e00 : 0x4000) |
5207             (extract32(imm8, 0, 6) << 3);
5208         imm <<= 16;
5209         break;
5210     case MO_16:
5211         imm = (extract32(imm8, 7, 1) ? 0x8000 : 0) |
5212             (extract32(imm8, 6, 1) ? 0x3000 : 0x4000) |
5213             (extract32(imm8, 0, 6) << 6);
5214         break;
5215     default:
5216         g_assert_not_reached();
5217     }
5218     return imm;
5219 }
5220
5221 /* Floating point immediate
5222  *   31  30  29 28       24 23  22  21 20        13 12   10 9    5 4    0
5223  * +---+---+---+-----------+------+---+------------+-------+------+------+
5224  * | M | 0 | S | 1 1 1 1 0 | type | 1 |    imm8    | 1 0 0 | imm5 |  Rd  |
5225  * +---+---+---+-----------+------+---+------------+-------+------+------+
5226  */
5227 static void disas_fp_imm(DisasContext *s, uint32_t insn)
5228 {
5229     int rd = extract32(insn, 0, 5);
5230     int imm8 = extract32(insn, 13, 8);
5231     int is_double = extract32(insn, 22, 2);
5232     uint64_t imm;
5233     TCGv_i64 tcg_res;
5234
5235     if (is_double > 1) {
5236         unallocated_encoding(s);
5237         return;
5238     }
5239
5240     if (!fp_access_check(s)) {
5241         return;
5242     }
5243
5244     imm = vfp_expand_imm(MO_32 + is_double, imm8);
5245
5246     tcg_res = tcg_const_i64(imm);
5247     write_fp_dreg(s, rd, tcg_res);
5248     tcg_temp_free_i64(tcg_res);
5249 }
5250
5251 /* Handle floating point <=> fixed point conversions. Note that we can
5252  * also deal with fp <=> integer conversions as a special case (scale == 64)
5253  * OPTME: consider handling that special case specially or at least skipping
5254  * the call to scalbn in the helpers for zero shifts.
5255  */
5256 static void handle_fpfpcvt(DisasContext *s, int rd, int rn, int opcode,
5257                            bool itof, int rmode, int scale, int sf, int type)
5258 {
5259     bool is_signed = !(opcode & 1);
5260     bool is_double = type;
5261     TCGv_ptr tcg_fpstatus;
5262     TCGv_i32 tcg_shift;
5263
5264     tcg_fpstatus = get_fpstatus_ptr(false);
5265
5266     tcg_shift = tcg_const_i32(64 - scale);
5267
5268     if (itof) {
5269         TCGv_i64 tcg_int = cpu_reg(s, rn);
5270         if (!sf) {
5271             TCGv_i64 tcg_extend = new_tmp_a64(s);
5272
5273             if (is_signed) {
5274                 tcg_gen_ext32s_i64(tcg_extend, tcg_int);
5275             } else {
5276                 tcg_gen_ext32u_i64(tcg_extend, tcg_int);
5277             }
5278
5279             tcg_int = tcg_extend;
5280         }
5281
5282         if (is_double) {
5283             TCGv_i64 tcg_double = tcg_temp_new_i64();
5284             if (is_signed) {
5285                 gen_helper_vfp_sqtod(tcg_double, tcg_int,
5286                                      tcg_shift, tcg_fpstatus);
5287             } else {
5288                 gen_helper_vfp_uqtod(tcg_double, tcg_int,
5289                                      tcg_shift, tcg_fpstatus);
5290             }
5291             write_fp_dreg(s, rd, tcg_double);
5292             tcg_temp_free_i64(tcg_double);
5293         } else {
5294             TCGv_i32 tcg_single = tcg_temp_new_i32();
5295             if (is_signed) {
5296                 gen_helper_vfp_sqtos(tcg_single, tcg_int,
5297                                      tcg_shift, tcg_fpstatus);
5298             } else {
5299                 gen_helper_vfp_uqtos(tcg_single, tcg_int,
5300                                      tcg_shift, tcg_fpstatus);
5301             }
5302             write_fp_sreg(s, rd, tcg_single);
5303             tcg_temp_free_i32(tcg_single);
5304         }
5305     } else {
5306         TCGv_i64 tcg_int = cpu_reg(s, rd);
5307         TCGv_i32 tcg_rmode;
5308
5309         if (extract32(opcode, 2, 1)) {
5310             /* There are too many rounding modes to all fit into rmode,
5311              * so FCVTA[US] is a special case.
5312              */
5313             rmode = FPROUNDING_TIEAWAY;
5314         }
5315
5316         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
5317
5318         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
5319
5320         if (is_double) {
5321             TCGv_i64 tcg_double = read_fp_dreg(s, rn);
5322             if (is_signed) {
5323                 if (!sf) {
5324                     gen_helper_vfp_tosld(tcg_int, tcg_double,
5325                                          tcg_shift, tcg_fpstatus);
5326                 } else {
5327                     gen_helper_vfp_tosqd(tcg_int, tcg_double,
5328                                          tcg_shift, tcg_fpstatus);
5329                 }
5330             } else {
5331                 if (!sf) {
5332                     gen_helper_vfp_tould(tcg_int, tcg_double,
5333                                          tcg_shift, tcg_fpstatus);
5334                 } else {
5335                     gen_helper_vfp_touqd(tcg_int, tcg_double,
5336                                          tcg_shift, tcg_fpstatus);
5337                 }
5338             }
5339             tcg_temp_free_i64(tcg_double);
5340         } else {
5341             TCGv_i32 tcg_single = read_fp_sreg(s, rn);
5342             if (sf) {
5343                 if (is_signed) {
5344                     gen_helper_vfp_tosqs(tcg_int, tcg_single,
5345                                          tcg_shift, tcg_fpstatus);
5346                 } else {
5347                     gen_helper_vfp_touqs(tcg_int, tcg_single,
5348                                          tcg_shift, tcg_fpstatus);
5349                 }
5350             } else {
5351                 TCGv_i32 tcg_dest = tcg_temp_new_i32();
5352                 if (is_signed) {
5353                     gen_helper_vfp_tosls(tcg_dest, tcg_single,
5354                                          tcg_shift, tcg_fpstatus);
5355                 } else {
5356                     gen_helper_vfp_touls(tcg_dest, tcg_single,
5357                                          tcg_shift, tcg_fpstatus);
5358                 }
5359                 tcg_gen_extu_i32_i64(tcg_int, tcg_dest);
5360                 tcg_temp_free_i32(tcg_dest);
5361             }
5362             tcg_temp_free_i32(tcg_single);
5363         }
5364
5365         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
5366         tcg_temp_free_i32(tcg_rmode);
5367
5368         if (!sf) {
5369             tcg_gen_ext32u_i64(tcg_int, tcg_int);
5370         }
5371     }
5372
5373     tcg_temp_free_ptr(tcg_fpstatus);
5374     tcg_temp_free_i32(tcg_shift);
5375 }
5376
5377 /* Floating point <-> fixed point conversions
5378  *   31   30  29 28       24 23  22  21 20   19 18    16 15   10 9    5 4    0
5379  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5380  * | sf | 0 | S | 1 1 1 1 0 | type | 0 | rmode | opcode | scale |  Rn  |  Rd  |
5381  * +----+---+---+-----------+------+---+-------+--------+-------+------+------+
5382  */
5383 static void disas_fp_fixed_conv(DisasContext *s, uint32_t insn)
5384 {
5385     int rd = extract32(insn, 0, 5);
5386     int rn = extract32(insn, 5, 5);
5387     int scale = extract32(insn, 10, 6);
5388     int opcode = extract32(insn, 16, 3);
5389     int rmode = extract32(insn, 19, 2);
5390     int type = extract32(insn, 22, 2);
5391     bool sbit = extract32(insn, 29, 1);
5392     bool sf = extract32(insn, 31, 1);
5393     bool itof;
5394
5395     if (sbit || (type > 1)
5396         || (!sf && scale < 32)) {
5397         unallocated_encoding(s);
5398         return;
5399     }
5400
5401     switch ((rmode << 3) | opcode) {
5402     case 0x2: /* SCVTF */
5403     case 0x3: /* UCVTF */
5404         itof = true;
5405         break;
5406     case 0x18: /* FCVTZS */
5407     case 0x19: /* FCVTZU */
5408         itof = false;
5409         break;
5410     default:
5411         unallocated_encoding(s);
5412         return;
5413     }
5414
5415     if (!fp_access_check(s)) {
5416         return;
5417     }
5418
5419     handle_fpfpcvt(s, rd, rn, opcode, itof, FPROUNDING_ZERO, scale, sf, type);
5420 }
5421
5422 static void handle_fmov(DisasContext *s, int rd, int rn, int type, bool itof)
5423 {
5424     /* FMOV: gpr to or from float, double, or top half of quad fp reg,
5425      * without conversion.
5426      */
5427
5428     if (itof) {
5429         TCGv_i64 tcg_rn = cpu_reg(s, rn);
5430
5431         switch (type) {
5432         case 0:
5433         {
5434             /* 32 bit */
5435             TCGv_i64 tmp = tcg_temp_new_i64();
5436             tcg_gen_ext32u_i64(tmp, tcg_rn);
5437             tcg_gen_st_i64(tmp, cpu_env, fp_reg_offset(s, rd, MO_64));
5438             tcg_gen_movi_i64(tmp, 0);
5439             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5440             tcg_temp_free_i64(tmp);
5441             break;
5442         }
5443         case 1:
5444         {
5445             /* 64 bit */
5446             TCGv_i64 tmp = tcg_const_i64(0);
5447             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_offset(s, rd, MO_64));
5448             tcg_gen_st_i64(tmp, cpu_env, fp_reg_hi_offset(s, rd));
5449             tcg_temp_free_i64(tmp);
5450             break;
5451         }
5452         case 2:
5453             /* 64 bit to top half. */
5454             tcg_gen_st_i64(tcg_rn, cpu_env, fp_reg_hi_offset(s, rd));
5455             break;
5456         }
5457     } else {
5458         TCGv_i64 tcg_rd = cpu_reg(s, rd);
5459
5460         switch (type) {
5461         case 0:
5462             /* 32 bit */
5463             tcg_gen_ld32u_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_32));
5464             break;
5465         case 1:
5466             /* 64 bit */
5467             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_offset(s, rn, MO_64));
5468             break;
5469         case 2:
5470             /* 64 bits from top half */
5471             tcg_gen_ld_i64(tcg_rd, cpu_env, fp_reg_hi_offset(s, rn));
5472             break;
5473         }
5474     }
5475 }
5476
5477 /* Floating point <-> integer conversions
5478  *   31   30  29 28       24 23  22  21 20   19 18 16 15         10 9  5 4  0
5479  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5480  * | sf | 0 | S | 1 1 1 1 0 | type | 1 | rmode | opc | 0 0 0 0 0 0 | Rn | Rd |
5481  * +----+---+---+-----------+------+---+-------+-----+-------------+----+----+
5482  */
5483 static void disas_fp_int_conv(DisasContext *s, uint32_t insn)
5484 {
5485     int rd = extract32(insn, 0, 5);
5486     int rn = extract32(insn, 5, 5);
5487     int opcode = extract32(insn, 16, 3);
5488     int rmode = extract32(insn, 19, 2);
5489     int type = extract32(insn, 22, 2);
5490     bool sbit = extract32(insn, 29, 1);
5491     bool sf = extract32(insn, 31, 1);
5492
5493     if (sbit) {
5494         unallocated_encoding(s);
5495         return;
5496     }
5497
5498     if (opcode > 5) {
5499         /* FMOV */
5500         bool itof = opcode & 1;
5501
5502         if (rmode >= 2) {
5503             unallocated_encoding(s);
5504             return;
5505         }
5506
5507         switch (sf << 3 | type << 1 | rmode) {
5508         case 0x0: /* 32 bit */
5509         case 0xa: /* 64 bit */
5510         case 0xd: /* 64 bit to top half of quad */
5511             break;
5512         default:
5513             /* all other sf/type/rmode combinations are invalid */
5514             unallocated_encoding(s);
5515             break;
5516         }
5517
5518         if (!fp_access_check(s)) {
5519             return;
5520         }
5521         handle_fmov(s, rd, rn, type, itof);
5522     } else {
5523         /* actual FP conversions */
5524         bool itof = extract32(opcode, 1, 1);
5525
5526         if (type > 1 || (rmode != 0 && opcode > 1)) {
5527             unallocated_encoding(s);
5528             return;
5529         }
5530
5531         if (!fp_access_check(s)) {
5532             return;
5533         }
5534         handle_fpfpcvt(s, rd, rn, opcode, itof, rmode, 64, sf, type);
5535     }
5536 }
5537
5538 /* FP-specific subcases of table C3-6 (SIMD and FP data processing)
5539  *   31  30  29 28     25 24                          0
5540  * +---+---+---+---------+-----------------------------+
5541  * |   | 0 |   | 1 1 1 1 |                             |
5542  * +---+---+---+---------+-----------------------------+
5543  */
5544 static void disas_data_proc_fp(DisasContext *s, uint32_t insn)
5545 {
5546     if (extract32(insn, 24, 1)) {
5547         /* Floating point data-processing (3 source) */
5548         disas_fp_3src(s, insn);
5549     } else if (extract32(insn, 21, 1) == 0) {
5550         /* Floating point to fixed point conversions */
5551         disas_fp_fixed_conv(s, insn);
5552     } else {
5553         switch (extract32(insn, 10, 2)) {
5554         case 1:
5555             /* Floating point conditional compare */
5556             disas_fp_ccomp(s, insn);
5557             break;
5558         case 2:
5559             /* Floating point data-processing (2 source) */
5560             disas_fp_2src(s, insn);
5561             break;
5562         case 3:
5563             /* Floating point conditional select */
5564             disas_fp_csel(s, insn);
5565             break;
5566         case 0:
5567             switch (ctz32(extract32(insn, 12, 4))) {
5568             case 0: /* [15:12] == xxx1 */
5569                 /* Floating point immediate */
5570                 disas_fp_imm(s, insn);
5571                 break;
5572             case 1: /* [15:12] == xx10 */
5573                 /* Floating point compare */
5574                 disas_fp_compare(s, insn);
5575                 break;
5576             case 2: /* [15:12] == x100 */
5577                 /* Floating point data-processing (1 source) */
5578                 disas_fp_1src(s, insn);
5579                 break;
5580             case 3: /* [15:12] == 1000 */
5581                 unallocated_encoding(s);
5582                 break;
5583             default: /* [15:12] == 0000 */
5584                 /* Floating point <-> integer conversions */
5585                 disas_fp_int_conv(s, insn);
5586                 break;
5587             }
5588             break;
5589         }
5590     }
5591 }
5592
5593 static void do_ext64(DisasContext *s, TCGv_i64 tcg_left, TCGv_i64 tcg_right,
5594                      int pos)
5595 {
5596     /* Extract 64 bits from the middle of two concatenated 64 bit
5597      * vector register slices left:right. The extracted bits start
5598      * at 'pos' bits into the right (least significant) side.
5599      * We return the result in tcg_right, and guarantee not to
5600      * trash tcg_left.
5601      */
5602     TCGv_i64 tcg_tmp = tcg_temp_new_i64();
5603     assert(pos > 0 && pos < 64);
5604
5605     tcg_gen_shri_i64(tcg_right, tcg_right, pos);
5606     tcg_gen_shli_i64(tcg_tmp, tcg_left, 64 - pos);
5607     tcg_gen_or_i64(tcg_right, tcg_right, tcg_tmp);
5608
5609     tcg_temp_free_i64(tcg_tmp);
5610 }
5611
5612 /* EXT
5613  *   31  30 29         24 23 22  21 20  16 15  14  11 10  9    5 4    0
5614  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5615  * | 0 | Q | 1 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | imm4 | 0 |  Rn  |  Rd  |
5616  * +---+---+-------------+-----+---+------+---+------+---+------+------+
5617  */
5618 static void disas_simd_ext(DisasContext *s, uint32_t insn)
5619 {
5620     int is_q = extract32(insn, 30, 1);
5621     int op2 = extract32(insn, 22, 2);
5622     int imm4 = extract32(insn, 11, 4);
5623     int rm = extract32(insn, 16, 5);
5624     int rn = extract32(insn, 5, 5);
5625     int rd = extract32(insn, 0, 5);
5626     int pos = imm4 << 3;
5627     TCGv_i64 tcg_resl, tcg_resh;
5628
5629     if (op2 != 0 || (!is_q && extract32(imm4, 3, 1))) {
5630         unallocated_encoding(s);
5631         return;
5632     }
5633
5634     if (!fp_access_check(s)) {
5635         return;
5636     }
5637
5638     tcg_resh = tcg_temp_new_i64();
5639     tcg_resl = tcg_temp_new_i64();
5640
5641     /* Vd gets bits starting at pos bits into Vm:Vn. This is
5642      * either extracting 128 bits from a 128:128 concatenation, or
5643      * extracting 64 bits from a 64:64 concatenation.
5644      */
5645     if (!is_q) {
5646         read_vec_element(s, tcg_resl, rn, 0, MO_64);
5647         if (pos != 0) {
5648             read_vec_element(s, tcg_resh, rm, 0, MO_64);
5649             do_ext64(s, tcg_resh, tcg_resl, pos);
5650         }
5651         tcg_gen_movi_i64(tcg_resh, 0);
5652     } else {
5653         TCGv_i64 tcg_hh;
5654         typedef struct {
5655             int reg;
5656             int elt;
5657         } EltPosns;
5658         EltPosns eltposns[] = { {rn, 0}, {rn, 1}, {rm, 0}, {rm, 1} };
5659         EltPosns *elt = eltposns;
5660
5661         if (pos >= 64) {
5662             elt++;
5663             pos -= 64;
5664         }
5665
5666         read_vec_element(s, tcg_resl, elt->reg, elt->elt, MO_64);
5667         elt++;
5668         read_vec_element(s, tcg_resh, elt->reg, elt->elt, MO_64);
5669         elt++;
5670         if (pos != 0) {
5671             do_ext64(s, tcg_resh, tcg_resl, pos);
5672             tcg_hh = tcg_temp_new_i64();
5673             read_vec_element(s, tcg_hh, elt->reg, elt->elt, MO_64);
5674             do_ext64(s, tcg_hh, tcg_resh, pos);
5675             tcg_temp_free_i64(tcg_hh);
5676         }
5677     }
5678
5679     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5680     tcg_temp_free_i64(tcg_resl);
5681     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5682     tcg_temp_free_i64(tcg_resh);
5683 }
5684
5685 /* TBL/TBX
5686  *   31  30 29         24 23 22  21 20  16 15  14 13  12  11 10 9    5 4    0
5687  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5688  * | 0 | Q | 0 0 1 1 1 0 | op2 | 0 |  Rm  | 0 | len | op | 0 0 |  Rn  |  Rd  |
5689  * +---+---+-------------+-----+---+------+---+-----+----+-----+------+------+
5690  */
5691 static void disas_simd_tb(DisasContext *s, uint32_t insn)
5692 {
5693     int op2 = extract32(insn, 22, 2);
5694     int is_q = extract32(insn, 30, 1);
5695     int rm = extract32(insn, 16, 5);
5696     int rn = extract32(insn, 5, 5);
5697     int rd = extract32(insn, 0, 5);
5698     int is_tblx = extract32(insn, 12, 1);
5699     int len = extract32(insn, 13, 2);
5700     TCGv_i64 tcg_resl, tcg_resh, tcg_idx;
5701     TCGv_i32 tcg_regno, tcg_numregs;
5702
5703     if (op2 != 0) {
5704         unallocated_encoding(s);
5705         return;
5706     }
5707
5708     if (!fp_access_check(s)) {
5709         return;
5710     }
5711
5712     /* This does a table lookup: for every byte element in the input
5713      * we index into a table formed from up to four vector registers,
5714      * and then the output is the result of the lookups. Our helper
5715      * function does the lookup operation for a single 64 bit part of
5716      * the input.
5717      */
5718     tcg_resl = tcg_temp_new_i64();
5719     tcg_resh = tcg_temp_new_i64();
5720
5721     if (is_tblx) {
5722         read_vec_element(s, tcg_resl, rd, 0, MO_64);
5723     } else {
5724         tcg_gen_movi_i64(tcg_resl, 0);
5725     }
5726     if (is_tblx && is_q) {
5727         read_vec_element(s, tcg_resh, rd, 1, MO_64);
5728     } else {
5729         tcg_gen_movi_i64(tcg_resh, 0);
5730     }
5731
5732     tcg_idx = tcg_temp_new_i64();
5733     tcg_regno = tcg_const_i32(rn);
5734     tcg_numregs = tcg_const_i32(len + 1);
5735     read_vec_element(s, tcg_idx, rm, 0, MO_64);
5736     gen_helper_simd_tbl(tcg_resl, cpu_env, tcg_resl, tcg_idx,
5737                         tcg_regno, tcg_numregs);
5738     if (is_q) {
5739         read_vec_element(s, tcg_idx, rm, 1, MO_64);
5740         gen_helper_simd_tbl(tcg_resh, cpu_env, tcg_resh, tcg_idx,
5741                             tcg_regno, tcg_numregs);
5742     }
5743     tcg_temp_free_i64(tcg_idx);
5744     tcg_temp_free_i32(tcg_regno);
5745     tcg_temp_free_i32(tcg_numregs);
5746
5747     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5748     tcg_temp_free_i64(tcg_resl);
5749     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5750     tcg_temp_free_i64(tcg_resh);
5751 }
5752
5753 /* ZIP/UZP/TRN
5754  *   31  30 29         24 23  22  21 20   16 15 14 12 11 10 9    5 4    0
5755  * +---+---+-------------+------+---+------+---+------------------+------+
5756  * | 0 | Q | 0 0 1 1 1 0 | size | 0 |  Rm  | 0 | opc | 1 0 |  Rn  |  Rd  |
5757  * +---+---+-------------+------+---+------+---+------------------+------+
5758  */
5759 static void disas_simd_zip_trn(DisasContext *s, uint32_t insn)
5760 {
5761     int rd = extract32(insn, 0, 5);
5762     int rn = extract32(insn, 5, 5);
5763     int rm = extract32(insn, 16, 5);
5764     int size = extract32(insn, 22, 2);
5765     /* opc field bits [1:0] indicate ZIP/UZP/TRN;
5766      * bit 2 indicates 1 vs 2 variant of the insn.
5767      */
5768     int opcode = extract32(insn, 12, 2);
5769     bool part = extract32(insn, 14, 1);
5770     bool is_q = extract32(insn, 30, 1);
5771     int esize = 8 << size;
5772     int i, ofs;
5773     int datasize = is_q ? 128 : 64;
5774     int elements = datasize / esize;
5775     TCGv_i64 tcg_res, tcg_resl, tcg_resh;
5776
5777     if (opcode == 0 || (size == 3 && !is_q)) {
5778         unallocated_encoding(s);
5779         return;
5780     }
5781
5782     if (!fp_access_check(s)) {
5783         return;
5784     }
5785
5786     tcg_resl = tcg_const_i64(0);
5787     tcg_resh = tcg_const_i64(0);
5788     tcg_res = tcg_temp_new_i64();
5789
5790     for (i = 0; i < elements; i++) {
5791         switch (opcode) {
5792         case 1: /* UZP1/2 */
5793         {
5794             int midpoint = elements / 2;
5795             if (i < midpoint) {
5796                 read_vec_element(s, tcg_res, rn, 2 * i + part, size);
5797             } else {
5798                 read_vec_element(s, tcg_res, rm,
5799                                  2 * (i - midpoint) + part, size);
5800             }
5801             break;
5802         }
5803         case 2: /* TRN1/2 */
5804             if (i & 1) {
5805                 read_vec_element(s, tcg_res, rm, (i & ~1) + part, size);
5806             } else {
5807                 read_vec_element(s, tcg_res, rn, (i & ~1) + part, size);
5808             }
5809             break;
5810         case 3: /* ZIP1/2 */
5811         {
5812             int base = part * elements / 2;
5813             if (i & 1) {
5814                 read_vec_element(s, tcg_res, rm, base + (i >> 1), size);
5815             } else {
5816                 read_vec_element(s, tcg_res, rn, base + (i >> 1), size);
5817             }
5818             break;
5819         }
5820         default:
5821             g_assert_not_reached();
5822         }
5823
5824         ofs = i * esize;
5825         if (ofs < 64) {
5826             tcg_gen_shli_i64(tcg_res, tcg_res, ofs);
5827             tcg_gen_or_i64(tcg_resl, tcg_resl, tcg_res);
5828         } else {
5829             tcg_gen_shli_i64(tcg_res, tcg_res, ofs - 64);
5830             tcg_gen_or_i64(tcg_resh, tcg_resh, tcg_res);
5831         }
5832     }
5833
5834     tcg_temp_free_i64(tcg_res);
5835
5836     write_vec_element(s, tcg_resl, rd, 0, MO_64);
5837     tcg_temp_free_i64(tcg_resl);
5838     write_vec_element(s, tcg_resh, rd, 1, MO_64);
5839     tcg_temp_free_i64(tcg_resh);
5840 }
5841
5842 /*
5843  * do_reduction_op helper
5844  *
5845  * This mirrors the Reduce() pseudocode in the ARM ARM. It is
5846  * important for correct NaN propagation that we do these
5847  * operations in exactly the order specified by the pseudocode.
5848  *
5849  * This is a recursive function, TCG temps should be freed by the
5850  * calling function once it is done with the values.
5851  */
5852 static TCGv_i32 do_reduction_op(DisasContext *s, int fpopcode, int rn,
5853                                 int esize, int size, int vmap, TCGv_ptr fpst)
5854 {
5855     if (esize == size) {
5856         int element;
5857         TCGMemOp msize = esize == 16 ? MO_16 : MO_32;
5858         TCGv_i32 tcg_elem;
5859
5860         /* We should have one register left here */
5861         assert(ctpop8(vmap) == 1);
5862         element = ctz32(vmap);
5863         assert(element < 8);
5864
5865         tcg_elem = tcg_temp_new_i32();
5866         read_vec_element_i32(s, tcg_elem, rn, element, msize);
5867         return tcg_elem;
5868     } else {
5869         int bits = size / 2;
5870         int shift = ctpop8(vmap) / 2;
5871         int vmap_lo = (vmap >> shift) & vmap;
5872         int vmap_hi = (vmap & ~vmap_lo);
5873         TCGv_i32 tcg_hi, tcg_lo, tcg_res;
5874
5875         tcg_hi = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_hi, fpst);
5876         tcg_lo = do_reduction_op(s, fpopcode, rn, esize, bits, vmap_lo, fpst);
5877         tcg_res = tcg_temp_new_i32();
5878
5879         switch (fpopcode) {
5880         case 0x0c: /* fmaxnmv half-precision */
5881             gen_helper_advsimd_maxnumh(tcg_res, tcg_lo, tcg_hi, fpst);
5882             break;
5883         case 0x0f: /* fmaxv half-precision */
5884             gen_helper_advsimd_maxh(tcg_res, tcg_lo, tcg_hi, fpst);
5885             break;
5886         case 0x1c: /* fminnmv half-precision */
5887             gen_helper_advsimd_minnumh(tcg_res, tcg_lo, tcg_hi, fpst);
5888             break;
5889         case 0x1f: /* fminv half-precision */
5890             gen_helper_advsimd_minh(tcg_res, tcg_lo, tcg_hi, fpst);
5891             break;
5892         case 0x2c: /* fmaxnmv */
5893             gen_helper_vfp_maxnums(tcg_res, tcg_lo, tcg_hi, fpst);
5894             break;
5895         case 0x2f: /* fmaxv */
5896             gen_helper_vfp_maxs(tcg_res, tcg_lo, tcg_hi, fpst);
5897             break;
5898         case 0x3c: /* fminnmv */
5899             gen_helper_vfp_minnums(tcg_res, tcg_lo, tcg_hi, fpst);
5900             break;
5901         case 0x3f: /* fminv */
5902             gen_helper_vfp_mins(tcg_res, tcg_lo, tcg_hi, fpst);
5903             break;
5904         default:
5905             g_assert_not_reached();
5906         }
5907
5908         tcg_temp_free_i32(tcg_hi);
5909         tcg_temp_free_i32(tcg_lo);
5910         return tcg_res;
5911     }
5912 }
5913
5914 /* AdvSIMD across lanes
5915  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
5916  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5917  * | 0 | Q | U | 0 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
5918  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
5919  */
5920 static void disas_simd_across_lanes(DisasContext *s, uint32_t insn)
5921 {
5922     int rd = extract32(insn, 0, 5);
5923     int rn = extract32(insn, 5, 5);
5924     int size = extract32(insn, 22, 2);
5925     int opcode = extract32(insn, 12, 5);
5926     bool is_q = extract32(insn, 30, 1);
5927     bool is_u = extract32(insn, 29, 1);
5928     bool is_fp = false;
5929     bool is_min = false;
5930     int esize;
5931     int elements;
5932     int i;
5933     TCGv_i64 tcg_res, tcg_elt;
5934
5935     switch (opcode) {
5936     case 0x1b: /* ADDV */
5937         if (is_u) {
5938             unallocated_encoding(s);
5939             return;
5940         }
5941         /* fall through */
5942     case 0x3: /* SADDLV, UADDLV */
5943     case 0xa: /* SMAXV, UMAXV */
5944     case 0x1a: /* SMINV, UMINV */
5945         if (size == 3 || (size == 2 && !is_q)) {
5946             unallocated_encoding(s);
5947             return;
5948         }
5949         break;
5950     case 0xc: /* FMAXNMV, FMINNMV */
5951     case 0xf: /* FMAXV, FMINV */
5952         /* Bit 1 of size field encodes min vs max and the actual size
5953          * depends on the encoding of the U bit. If not set (and FP16
5954          * enabled) then we do half-precision float instead of single
5955          * precision.
5956          */
5957         is_min = extract32(size, 1, 1);
5958         is_fp = true;
5959         if (!is_u && arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
5960             size = 1;
5961         } else if (!is_u || !is_q || extract32(size, 0, 1)) {
5962             unallocated_encoding(s);
5963             return;
5964         } else {
5965             size = 2;
5966         }
5967         break;
5968     default:
5969         unallocated_encoding(s);
5970         return;
5971     }
5972
5973     if (!fp_access_check(s)) {
5974         return;
5975     }
5976
5977     esize = 8 << size;
5978     elements = (is_q ? 128 : 64) / esize;
5979
5980     tcg_res = tcg_temp_new_i64();
5981     tcg_elt = tcg_temp_new_i64();
5982
5983     /* These instructions operate across all lanes of a vector
5984      * to produce a single result. We can guarantee that a 64
5985      * bit intermediate is sufficient:
5986      *  + for [US]ADDLV the maximum element size is 32 bits, and
5987      *    the result type is 64 bits
5988      *  + for FMAX*V, FMIN*V, ADDV the intermediate type is the
5989      *    same as the element size, which is 32 bits at most
5990      * For the integer operations we can choose to work at 64
5991      * or 32 bits and truncate at the end; for simplicity
5992      * we use 64 bits always. The floating point
5993      * ops do require 32 bit intermediates, though.
5994      */
5995     if (!is_fp) {
5996         read_vec_element(s, tcg_res, rn, 0, size | (is_u ? 0 : MO_SIGN));
5997
5998         for (i = 1; i < elements; i++) {
5999             read_vec_element(s, tcg_elt, rn, i, size | (is_u ? 0 : MO_SIGN));
6000
6001             switch (opcode) {
6002             case 0x03: /* SADDLV / UADDLV */
6003             case 0x1b: /* ADDV */
6004                 tcg_gen_add_i64(tcg_res, tcg_res, tcg_elt);
6005                 break;
6006             case 0x0a: /* SMAXV / UMAXV */
6007                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
6008                                     tcg_res,
6009                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
6010                 break;
6011             case 0x1a: /* SMINV / UMINV */
6012                 tcg_gen_movcond_i64(is_u ? TCG_COND_LEU : TCG_COND_LE,
6013                                     tcg_res,
6014                                     tcg_res, tcg_elt, tcg_res, tcg_elt);
6015                 break;
6016                 break;
6017             default:
6018                 g_assert_not_reached();
6019             }
6020
6021         }
6022     } else {
6023         /* Floating point vector reduction ops which work across 32
6024          * bit (single) or 16 bit (half-precision) intermediates.
6025          * Note that correct NaN propagation requires that we do these
6026          * operations in exactly the order specified by the pseudocode.
6027          */
6028         TCGv_ptr fpst = get_fpstatus_ptr(size == MO_16);
6029         int fpopcode = opcode | is_min << 4 | is_u << 5;
6030         int vmap = (1 << elements) - 1;
6031         TCGv_i32 tcg_res32 = do_reduction_op(s, fpopcode, rn, esize,
6032                                              (is_q ? 128 : 64), vmap, fpst);
6033         tcg_gen_extu_i32_i64(tcg_res, tcg_res32);
6034         tcg_temp_free_i32(tcg_res32);
6035         tcg_temp_free_ptr(fpst);
6036     }
6037
6038     tcg_temp_free_i64(tcg_elt);
6039
6040     /* Now truncate the result to the width required for the final output */
6041     if (opcode == 0x03) {
6042         /* SADDLV, UADDLV: result is 2*esize */
6043         size++;
6044     }
6045
6046     switch (size) {
6047     case 0:
6048         tcg_gen_ext8u_i64(tcg_res, tcg_res);
6049         break;
6050     case 1:
6051         tcg_gen_ext16u_i64(tcg_res, tcg_res);
6052         break;
6053     case 2:
6054         tcg_gen_ext32u_i64(tcg_res, tcg_res);
6055         break;
6056     case 3:
6057         break;
6058     default:
6059         g_assert_not_reached();
6060     }
6061
6062     write_fp_dreg(s, rd, tcg_res);
6063     tcg_temp_free_i64(tcg_res);
6064 }
6065
6066 /* DUP (Element, Vector)
6067  *
6068  *  31  30   29              21 20    16 15        10  9    5 4    0
6069  * +---+---+-------------------+--------+-------------+------+------+
6070  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
6071  * +---+---+-------------------+--------+-------------+------+------+
6072  *
6073  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6074  */
6075 static void handle_simd_dupe(DisasContext *s, int is_q, int rd, int rn,
6076                              int imm5)
6077 {
6078     int size = ctz32(imm5);
6079     int index = imm5 >> (size + 1);
6080
6081     if (size > 3 || (size == 3 && !is_q)) {
6082         unallocated_encoding(s);
6083         return;
6084     }
6085
6086     if (!fp_access_check(s)) {
6087         return;
6088     }
6089
6090     tcg_gen_gvec_dup_mem(size, vec_full_reg_offset(s, rd),
6091                          vec_reg_offset(s, rn, index, size),
6092                          is_q ? 16 : 8, vec_full_reg_size(s));
6093 }
6094
6095 /* DUP (element, scalar)
6096  *  31                   21 20    16 15        10  9    5 4    0
6097  * +-----------------------+--------+-------------+------+------+
6098  * | 0 1 0 1 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 0 1 |  Rn  |  Rd  |
6099  * +-----------------------+--------+-------------+------+------+
6100  */
6101 static void handle_simd_dupes(DisasContext *s, int rd, int rn,
6102                               int imm5)
6103 {
6104     int size = ctz32(imm5);
6105     int index;
6106     TCGv_i64 tmp;
6107
6108     if (size > 3) {
6109         unallocated_encoding(s);
6110         return;
6111     }
6112
6113     if (!fp_access_check(s)) {
6114         return;
6115     }
6116
6117     index = imm5 >> (size + 1);
6118
6119     /* This instruction just extracts the specified element and
6120      * zero-extends it into the bottom of the destination register.
6121      */
6122     tmp = tcg_temp_new_i64();
6123     read_vec_element(s, tmp, rn, index, size);
6124     write_fp_dreg(s, rd, tmp);
6125     tcg_temp_free_i64(tmp);
6126 }
6127
6128 /* DUP (General)
6129  *
6130  *  31  30   29              21 20    16 15        10  9    5 4    0
6131  * +---+---+-------------------+--------+-------------+------+------+
6132  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 0 1 1 |  Rn  |  Rd  |
6133  * +---+---+-------------------+--------+-------------+------+------+
6134  *
6135  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6136  */
6137 static void handle_simd_dupg(DisasContext *s, int is_q, int rd, int rn,
6138                              int imm5)
6139 {
6140     int size = ctz32(imm5);
6141     uint32_t dofs, oprsz, maxsz;
6142
6143     if (size > 3 || ((size == 3) && !is_q)) {
6144         unallocated_encoding(s);
6145         return;
6146     }
6147
6148     if (!fp_access_check(s)) {
6149         return;
6150     }
6151
6152     dofs = vec_full_reg_offset(s, rd);
6153     oprsz = is_q ? 16 : 8;
6154     maxsz = vec_full_reg_size(s);
6155
6156     tcg_gen_gvec_dup_i64(size, dofs, oprsz, maxsz, cpu_reg(s, rn));
6157 }
6158
6159 /* INS (Element)
6160  *
6161  *  31                   21 20    16 15  14    11  10 9    5 4    0
6162  * +-----------------------+--------+------------+---+------+------+
6163  * | 0 1 1 0 1 1 1 0 0 0 0 |  imm5  | 0 |  imm4  | 1 |  Rn  |  Rd  |
6164  * +-----------------------+--------+------------+---+------+------+
6165  *
6166  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6167  * index: encoded in imm5<4:size+1>
6168  */
6169 static void handle_simd_inse(DisasContext *s, int rd, int rn,
6170                              int imm4, int imm5)
6171 {
6172     int size = ctz32(imm5);
6173     int src_index, dst_index;
6174     TCGv_i64 tmp;
6175
6176     if (size > 3) {
6177         unallocated_encoding(s);
6178         return;
6179     }
6180
6181     if (!fp_access_check(s)) {
6182         return;
6183     }
6184
6185     dst_index = extract32(imm5, 1+size, 5);
6186     src_index = extract32(imm4, size, 4);
6187
6188     tmp = tcg_temp_new_i64();
6189
6190     read_vec_element(s, tmp, rn, src_index, size);
6191     write_vec_element(s, tmp, rd, dst_index, size);
6192
6193     tcg_temp_free_i64(tmp);
6194 }
6195
6196
6197 /* INS (General)
6198  *
6199  *  31                   21 20    16 15        10  9    5 4    0
6200  * +-----------------------+--------+-------------+------+------+
6201  * | 0 1 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 0 1 1 1 |  Rn  |  Rd  |
6202  * +-----------------------+--------+-------------+------+------+
6203  *
6204  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6205  * index: encoded in imm5<4:size+1>
6206  */
6207 static void handle_simd_insg(DisasContext *s, int rd, int rn, int imm5)
6208 {
6209     int size = ctz32(imm5);
6210     int idx;
6211
6212     if (size > 3) {
6213         unallocated_encoding(s);
6214         return;
6215     }
6216
6217     if (!fp_access_check(s)) {
6218         return;
6219     }
6220
6221     idx = extract32(imm5, 1 + size, 4 - size);
6222     write_vec_element(s, cpu_reg(s, rn), rd, idx, size);
6223 }
6224
6225 /*
6226  * UMOV (General)
6227  * SMOV (General)
6228  *
6229  *  31  30   29              21 20    16 15    12   10 9    5 4    0
6230  * +---+---+-------------------+--------+-------------+------+------+
6231  * | 0 | Q | 0 0 1 1 1 0 0 0 0 |  imm5  | 0 0 1 U 1 1 |  Rn  |  Rd  |
6232  * +---+---+-------------------+--------+-------------+------+------+
6233  *
6234  * U: unsigned when set
6235  * size: encoded in imm5 (see ARM ARM LowestSetBit())
6236  */
6237 static void handle_simd_umov_smov(DisasContext *s, int is_q, int is_signed,
6238                                   int rn, int rd, int imm5)
6239 {
6240     int size = ctz32(imm5);
6241     int element;
6242     TCGv_i64 tcg_rd;
6243
6244     /* Check for UnallocatedEncodings */
6245     if (is_signed) {
6246         if (size > 2 || (size == 2 && !is_q)) {
6247             unallocated_encoding(s);
6248             return;
6249         }
6250     } else {
6251         if (size > 3
6252             || (size < 3 && is_q)
6253             || (size == 3 && !is_q)) {
6254             unallocated_encoding(s);
6255             return;
6256         }
6257     }
6258
6259     if (!fp_access_check(s)) {
6260         return;
6261     }
6262
6263     element = extract32(imm5, 1+size, 4);
6264
6265     tcg_rd = cpu_reg(s, rd);
6266     read_vec_element(s, tcg_rd, rn, element, size | (is_signed ? MO_SIGN : 0));
6267     if (is_signed && !is_q) {
6268         tcg_gen_ext32u_i64(tcg_rd, tcg_rd);
6269     }
6270 }
6271
6272 /* AdvSIMD copy
6273  *   31  30  29  28             21 20  16 15  14  11 10  9    5 4    0
6274  * +---+---+----+-----------------+------+---+------+---+------+------+
6275  * | 0 | Q | op | 0 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6276  * +---+---+----+-----------------+------+---+------+---+------+------+
6277  */
6278 static void disas_simd_copy(DisasContext *s, uint32_t insn)
6279 {
6280     int rd = extract32(insn, 0, 5);
6281     int rn = extract32(insn, 5, 5);
6282     int imm4 = extract32(insn, 11, 4);
6283     int op = extract32(insn, 29, 1);
6284     int is_q = extract32(insn, 30, 1);
6285     int imm5 = extract32(insn, 16, 5);
6286
6287     if (op) {
6288         if (is_q) {
6289             /* INS (element) */
6290             handle_simd_inse(s, rd, rn, imm4, imm5);
6291         } else {
6292             unallocated_encoding(s);
6293         }
6294     } else {
6295         switch (imm4) {
6296         case 0:
6297             /* DUP (element - vector) */
6298             handle_simd_dupe(s, is_q, rd, rn, imm5);
6299             break;
6300         case 1:
6301             /* DUP (general) */
6302             handle_simd_dupg(s, is_q, rd, rn, imm5);
6303             break;
6304         case 3:
6305             if (is_q) {
6306                 /* INS (general) */
6307                 handle_simd_insg(s, rd, rn, imm5);
6308             } else {
6309                 unallocated_encoding(s);
6310             }
6311             break;
6312         case 5:
6313         case 7:
6314             /* UMOV/SMOV (is_q indicates 32/64; imm4 indicates signedness) */
6315             handle_simd_umov_smov(s, is_q, (imm4 == 5), rn, rd, imm5);
6316             break;
6317         default:
6318             unallocated_encoding(s);
6319             break;
6320         }
6321     }
6322 }
6323
6324 /* AdvSIMD modified immediate
6325  *  31  30   29  28                 19 18 16 15   12  11  10  9     5 4    0
6326  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6327  * | 0 | Q | op | 0 1 1 1 1 0 0 0 0 0 | abc | cmode | o2 | 1 | defgh |  Rd  |
6328  * +---+---+----+---------------------+-----+-------+----+---+-------+------+
6329  *
6330  * There are a number of operations that can be carried out here:
6331  *   MOVI - move (shifted) imm into register
6332  *   MVNI - move inverted (shifted) imm into register
6333  *   ORR  - bitwise OR of (shifted) imm with register
6334  *   BIC  - bitwise clear of (shifted) imm with register
6335  * With ARMv8.2 we also have:
6336  *   FMOV half-precision
6337  */
6338 static void disas_simd_mod_imm(DisasContext *s, uint32_t insn)
6339 {
6340     int rd = extract32(insn, 0, 5);
6341     int cmode = extract32(insn, 12, 4);
6342     int cmode_3_1 = extract32(cmode, 1, 3);
6343     int cmode_0 = extract32(cmode, 0, 1);
6344     int o2 = extract32(insn, 11, 1);
6345     uint64_t abcdefgh = extract32(insn, 5, 5) | (extract32(insn, 16, 3) << 5);
6346     bool is_neg = extract32(insn, 29, 1);
6347     bool is_q = extract32(insn, 30, 1);
6348     uint64_t imm = 0;
6349
6350     if (o2 != 0 || ((cmode == 0xf) && is_neg && !is_q)) {
6351         /* Check for FMOV (vector, immediate) - half-precision */
6352         if (!(arm_dc_feature(s, ARM_FEATURE_V8_FP16) && o2 && cmode == 0xf)) {
6353             unallocated_encoding(s);
6354             return;
6355         }
6356     }
6357
6358     if (!fp_access_check(s)) {
6359         return;
6360     }
6361
6362     /* See AdvSIMDExpandImm() in ARM ARM */
6363     switch (cmode_3_1) {
6364     case 0: /* Replicate(Zeros(24):imm8, 2) */
6365     case 1: /* Replicate(Zeros(16):imm8:Zeros(8), 2) */
6366     case 2: /* Replicate(Zeros(8):imm8:Zeros(16), 2) */
6367     case 3: /* Replicate(imm8:Zeros(24), 2) */
6368     {
6369         int shift = cmode_3_1 * 8;
6370         imm = bitfield_replicate(abcdefgh << shift, 32);
6371         break;
6372     }
6373     case 4: /* Replicate(Zeros(8):imm8, 4) */
6374     case 5: /* Replicate(imm8:Zeros(8), 4) */
6375     {
6376         int shift = (cmode_3_1 & 0x1) * 8;
6377         imm = bitfield_replicate(abcdefgh << shift, 16);
6378         break;
6379     }
6380     case 6:
6381         if (cmode_0) {
6382             /* Replicate(Zeros(8):imm8:Ones(16), 2) */
6383             imm = (abcdefgh << 16) | 0xffff;
6384         } else {
6385             /* Replicate(Zeros(16):imm8:Ones(8), 2) */
6386             imm = (abcdefgh << 8) | 0xff;
6387         }
6388         imm = bitfield_replicate(imm, 32);
6389         break;
6390     case 7:
6391         if (!cmode_0 && !is_neg) {
6392             imm = bitfield_replicate(abcdefgh, 8);
6393         } else if (!cmode_0 && is_neg) {
6394             int i;
6395             imm = 0;
6396             for (i = 0; i < 8; i++) {
6397                 if ((abcdefgh) & (1 << i)) {
6398                     imm |= 0xffULL << (i * 8);
6399                 }
6400             }
6401         } else if (cmode_0) {
6402             if (is_neg) {
6403                 imm = (abcdefgh & 0x3f) << 48;
6404                 if (abcdefgh & 0x80) {
6405                     imm |= 0x8000000000000000ULL;
6406                 }
6407                 if (abcdefgh & 0x40) {
6408                     imm |= 0x3fc0000000000000ULL;
6409                 } else {
6410                     imm |= 0x4000000000000000ULL;
6411                 }
6412             } else {
6413                 if (o2) {
6414                     /* FMOV (vector, immediate) - half-precision */
6415                     imm = vfp_expand_imm(MO_16, abcdefgh);
6416                     /* now duplicate across the lanes */
6417                     imm = bitfield_replicate(imm, 16);
6418                 } else {
6419                     imm = (abcdefgh & 0x3f) << 19;
6420                     if (abcdefgh & 0x80) {
6421                         imm |= 0x80000000;
6422                     }
6423                     if (abcdefgh & 0x40) {
6424                         imm |= 0x3e000000;
6425                     } else {
6426                         imm |= 0x40000000;
6427                     }
6428                     imm |= (imm << 32);
6429                 }
6430             }
6431         }
6432         break;
6433     default:
6434         fprintf(stderr, "%s: cmode_3_1: %x\n", __func__, cmode_3_1);
6435         g_assert_not_reached();
6436     }
6437
6438     if (cmode_3_1 != 7 && is_neg) {
6439         imm = ~imm;
6440     }
6441
6442     if (!((cmode & 0x9) == 0x1 || (cmode & 0xd) == 0x9)) {
6443         /* MOVI or MVNI, with MVNI negation handled above.  */
6444         tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), is_q ? 16 : 8,
6445                             vec_full_reg_size(s), imm);
6446     } else {
6447         /* ORR or BIC, with BIC negation to AND handled above.  */
6448         if (is_neg) {
6449             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_andi, MO_64);
6450         } else {
6451             gen_gvec_fn2i(s, is_q, rd, rd, imm, tcg_gen_gvec_ori, MO_64);
6452         }
6453     }
6454 }
6455
6456 /* AdvSIMD scalar copy
6457  *  31 30  29  28             21 20  16 15  14  11 10  9    5 4    0
6458  * +-----+----+-----------------+------+---+------+---+------+------+
6459  * | 0 1 | op | 1 1 1 1 0 0 0 0 | imm5 | 0 | imm4 | 1 |  Rn  |  Rd  |
6460  * +-----+----+-----------------+------+---+------+---+------+------+
6461  */
6462 static void disas_simd_scalar_copy(DisasContext *s, uint32_t insn)
6463 {
6464     int rd = extract32(insn, 0, 5);
6465     int rn = extract32(insn, 5, 5);
6466     int imm4 = extract32(insn, 11, 4);
6467     int imm5 = extract32(insn, 16, 5);
6468     int op = extract32(insn, 29, 1);
6469
6470     if (op != 0 || imm4 != 0) {
6471         unallocated_encoding(s);
6472         return;
6473     }
6474
6475     /* DUP (element, scalar) */
6476     handle_simd_dupes(s, rd, rn, imm5);
6477 }
6478
6479 /* AdvSIMD scalar pairwise
6480  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
6481  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6482  * | 0 1 | U | 1 1 1 1 0 | size | 1 1 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
6483  * +-----+---+-----------+------+-----------+--------+-----+------+------+
6484  */
6485 static void disas_simd_scalar_pairwise(DisasContext *s, uint32_t insn)
6486 {
6487     int u = extract32(insn, 29, 1);
6488     int size = extract32(insn, 22, 2);
6489     int opcode = extract32(insn, 12, 5);
6490     int rn = extract32(insn, 5, 5);
6491     int rd = extract32(insn, 0, 5);
6492     TCGv_ptr fpst;
6493
6494     /* For some ops (the FP ones), size[1] is part of the encoding.
6495      * For ADDP strictly it is not but size[1] is always 1 for valid
6496      * encodings.
6497      */
6498     opcode |= (extract32(size, 1, 1) << 5);
6499
6500     switch (opcode) {
6501     case 0x3b: /* ADDP */
6502         if (u || size != 3) {
6503             unallocated_encoding(s);
6504             return;
6505         }
6506         if (!fp_access_check(s)) {
6507             return;
6508         }
6509
6510         fpst = NULL;
6511         break;
6512     case 0xc: /* FMAXNMP */
6513     case 0xd: /* FADDP */
6514     case 0xf: /* FMAXP */
6515     case 0x2c: /* FMINNMP */
6516     case 0x2f: /* FMINP */
6517         /* FP op, size[0] is 32 or 64 bit*/
6518         if (!u) {
6519             if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
6520                 unallocated_encoding(s);
6521                 return;
6522             } else {
6523                 size = MO_16;
6524             }
6525         } else {
6526             size = extract32(size, 0, 1) ? MO_64 : MO_32;
6527         }
6528
6529         if (!fp_access_check(s)) {
6530             return;
6531         }
6532
6533         fpst = get_fpstatus_ptr(size == MO_16);
6534         break;
6535     default:
6536         unallocated_encoding(s);
6537         return;
6538     }
6539
6540     if (size == MO_64) {
6541         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
6542         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
6543         TCGv_i64 tcg_res = tcg_temp_new_i64();
6544
6545         read_vec_element(s, tcg_op1, rn, 0, MO_64);
6546         read_vec_element(s, tcg_op2, rn, 1, MO_64);
6547
6548         switch (opcode) {
6549         case 0x3b: /* ADDP */
6550             tcg_gen_add_i64(tcg_res, tcg_op1, tcg_op2);
6551             break;
6552         case 0xc: /* FMAXNMP */
6553             gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6554             break;
6555         case 0xd: /* FADDP */
6556             gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
6557             break;
6558         case 0xf: /* FMAXP */
6559             gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
6560             break;
6561         case 0x2c: /* FMINNMP */
6562             gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
6563             break;
6564         case 0x2f: /* FMINP */
6565             gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
6566             break;
6567         default:
6568             g_assert_not_reached();
6569         }
6570
6571         write_fp_dreg(s, rd, tcg_res);
6572
6573         tcg_temp_free_i64(tcg_op1);
6574         tcg_temp_free_i64(tcg_op2);
6575         tcg_temp_free_i64(tcg_res);
6576     } else {
6577         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
6578         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
6579         TCGv_i32 tcg_res = tcg_temp_new_i32();
6580
6581         read_vec_element_i32(s, tcg_op1, rn, 0, size);
6582         read_vec_element_i32(s, tcg_op2, rn, 1, size);
6583
6584         if (size == MO_16) {
6585             switch (opcode) {
6586             case 0xc: /* FMAXNMP */
6587                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6588                 break;
6589             case 0xd: /* FADDP */
6590                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
6591                 break;
6592             case 0xf: /* FMAXP */
6593                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
6594                 break;
6595             case 0x2c: /* FMINNMP */
6596                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
6597                 break;
6598             case 0x2f: /* FMINP */
6599                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
6600                 break;
6601             default:
6602                 g_assert_not_reached();
6603             }
6604         } else {
6605             switch (opcode) {
6606             case 0xc: /* FMAXNMP */
6607                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
6608                 break;
6609             case 0xd: /* FADDP */
6610                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
6611                 break;
6612             case 0xf: /* FMAXP */
6613                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
6614                 break;
6615             case 0x2c: /* FMINNMP */
6616                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
6617                 break;
6618             case 0x2f: /* FMINP */
6619                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
6620                 break;
6621             default:
6622                 g_assert_not_reached();
6623             }
6624         }
6625
6626         write_fp_sreg(s, rd, tcg_res);
6627
6628         tcg_temp_free_i32(tcg_op1);
6629         tcg_temp_free_i32(tcg_op2);
6630         tcg_temp_free_i32(tcg_res);
6631     }
6632
6633     if (fpst) {
6634         tcg_temp_free_ptr(fpst);
6635     }
6636 }
6637
6638 /*
6639  * Common SSHR[RA]/USHR[RA] - Shift right (optional rounding/accumulate)
6640  *
6641  * This code is handles the common shifting code and is used by both
6642  * the vector and scalar code.
6643  */
6644 static void handle_shri_with_rndacc(TCGv_i64 tcg_res, TCGv_i64 tcg_src,
6645                                     TCGv_i64 tcg_rnd, bool accumulate,
6646                                     bool is_u, int size, int shift)
6647 {
6648     bool extended_result = false;
6649     bool round = tcg_rnd != NULL;
6650     int ext_lshift = 0;
6651     TCGv_i64 tcg_src_hi;
6652
6653     if (round && size == 3) {
6654         extended_result = true;
6655         ext_lshift = 64 - shift;
6656         tcg_src_hi = tcg_temp_new_i64();
6657     } else if (shift == 64) {
6658         if (!accumulate && is_u) {
6659             /* result is zero */
6660             tcg_gen_movi_i64(tcg_res, 0);
6661             return;
6662         }
6663     }
6664
6665     /* Deal with the rounding step */
6666     if (round) {
6667         if (extended_result) {
6668             TCGv_i64 tcg_zero = tcg_const_i64(0);
6669             if (!is_u) {
6670                 /* take care of sign extending tcg_res */
6671                 tcg_gen_sari_i64(tcg_src_hi, tcg_src, 63);
6672                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6673                                  tcg_src, tcg_src_hi,
6674                                  tcg_rnd, tcg_zero);
6675             } else {
6676                 tcg_gen_add2_i64(tcg_src, tcg_src_hi,
6677                                  tcg_src, tcg_zero,
6678                                  tcg_rnd, tcg_zero);
6679             }
6680             tcg_temp_free_i64(tcg_zero);
6681         } else {
6682             tcg_gen_add_i64(tcg_src, tcg_src, tcg_rnd);
6683         }
6684     }
6685
6686     /* Now do the shift right */
6687     if (round && extended_result) {
6688         /* extended case, >64 bit precision required */
6689         if (ext_lshift == 0) {
6690             /* special case, only high bits matter */
6691             tcg_gen_mov_i64(tcg_src, tcg_src_hi);
6692         } else {
6693             tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6694             tcg_gen_shli_i64(tcg_src_hi, tcg_src_hi, ext_lshift);
6695             tcg_gen_or_i64(tcg_src, tcg_src, tcg_src_hi);
6696         }
6697     } else {
6698         if (is_u) {
6699             if (shift == 64) {
6700                 /* essentially shifting in 64 zeros */
6701                 tcg_gen_movi_i64(tcg_src, 0);
6702             } else {
6703                 tcg_gen_shri_i64(tcg_src, tcg_src, shift);
6704             }
6705         } else {
6706             if (shift == 64) {
6707                 /* effectively extending the sign-bit */
6708                 tcg_gen_sari_i64(tcg_src, tcg_src, 63);
6709             } else {
6710                 tcg_gen_sari_i64(tcg_src, tcg_src, shift);
6711             }
6712         }
6713     }
6714
6715     if (accumulate) {
6716         tcg_gen_add_i64(tcg_res, tcg_res, tcg_src);
6717     } else {
6718         tcg_gen_mov_i64(tcg_res, tcg_src);
6719     }
6720
6721     if (extended_result) {
6722         tcg_temp_free_i64(tcg_src_hi);
6723     }
6724 }
6725
6726 /* SSHR[RA]/USHR[RA] - Scalar shift right (optional rounding/accumulate) */
6727 static void handle_scalar_simd_shri(DisasContext *s,
6728                                     bool is_u, int immh, int immb,
6729                                     int opcode, int rn, int rd)
6730 {
6731     const int size = 3;
6732     int immhb = immh << 3 | immb;
6733     int shift = 2 * (8 << size) - immhb;
6734     bool accumulate = false;
6735     bool round = false;
6736     bool insert = false;
6737     TCGv_i64 tcg_rn;
6738     TCGv_i64 tcg_rd;
6739     TCGv_i64 tcg_round;
6740
6741     if (!extract32(immh, 3, 1)) {
6742         unallocated_encoding(s);
6743         return;
6744     }
6745
6746     if (!fp_access_check(s)) {
6747         return;
6748     }
6749
6750     switch (opcode) {
6751     case 0x02: /* SSRA / USRA (accumulate) */
6752         accumulate = true;
6753         break;
6754     case 0x04: /* SRSHR / URSHR (rounding) */
6755         round = true;
6756         break;
6757     case 0x06: /* SRSRA / URSRA (accum + rounding) */
6758         accumulate = round = true;
6759         break;
6760     case 0x08: /* SRI */
6761         insert = true;
6762         break;
6763     }
6764
6765     if (round) {
6766         uint64_t round_const = 1ULL << (shift - 1);
6767         tcg_round = tcg_const_i64(round_const);
6768     } else {
6769         tcg_round = NULL;
6770     }
6771
6772     tcg_rn = read_fp_dreg(s, rn);
6773     tcg_rd = (accumulate || insert) ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6774
6775     if (insert) {
6776         /* shift count same as element size is valid but does nothing;
6777          * special case to avoid potential shift by 64.
6778          */
6779         int esize = 8 << size;
6780         if (shift != esize) {
6781             tcg_gen_shri_i64(tcg_rn, tcg_rn, shift);
6782             tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, 0, esize - shift);
6783         }
6784     } else {
6785         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6786                                 accumulate, is_u, size, shift);
6787     }
6788
6789     write_fp_dreg(s, rd, tcg_rd);
6790
6791     tcg_temp_free_i64(tcg_rn);
6792     tcg_temp_free_i64(tcg_rd);
6793     if (round) {
6794         tcg_temp_free_i64(tcg_round);
6795     }
6796 }
6797
6798 /* SHL/SLI - Scalar shift left */
6799 static void handle_scalar_simd_shli(DisasContext *s, bool insert,
6800                                     int immh, int immb, int opcode,
6801                                     int rn, int rd)
6802 {
6803     int size = 32 - clz32(immh) - 1;
6804     int immhb = immh << 3 | immb;
6805     int shift = immhb - (8 << size);
6806     TCGv_i64 tcg_rn = new_tmp_a64(s);
6807     TCGv_i64 tcg_rd = new_tmp_a64(s);
6808
6809     if (!extract32(immh, 3, 1)) {
6810         unallocated_encoding(s);
6811         return;
6812     }
6813
6814     if (!fp_access_check(s)) {
6815         return;
6816     }
6817
6818     tcg_rn = read_fp_dreg(s, rn);
6819     tcg_rd = insert ? read_fp_dreg(s, rd) : tcg_temp_new_i64();
6820
6821     if (insert) {
6822         tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, shift, 64 - shift);
6823     } else {
6824         tcg_gen_shli_i64(tcg_rd, tcg_rn, shift);
6825     }
6826
6827     write_fp_dreg(s, rd, tcg_rd);
6828
6829     tcg_temp_free_i64(tcg_rn);
6830     tcg_temp_free_i64(tcg_rd);
6831 }
6832
6833 /* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
6834  * (signed/unsigned) narrowing */
6835 static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
6836                                    bool is_u_shift, bool is_u_narrow,
6837                                    int immh, int immb, int opcode,
6838                                    int rn, int rd)
6839 {
6840     int immhb = immh << 3 | immb;
6841     int size = 32 - clz32(immh) - 1;
6842     int esize = 8 << size;
6843     int shift = (2 * esize) - immhb;
6844     int elements = is_scalar ? 1 : (64 / esize);
6845     bool round = extract32(opcode, 0, 1);
6846     TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
6847     TCGv_i64 tcg_rn, tcg_rd, tcg_round;
6848     TCGv_i32 tcg_rd_narrowed;
6849     TCGv_i64 tcg_final;
6850
6851     static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
6852         { gen_helper_neon_narrow_sat_s8,
6853           gen_helper_neon_unarrow_sat8 },
6854         { gen_helper_neon_narrow_sat_s16,
6855           gen_helper_neon_unarrow_sat16 },
6856         { gen_helper_neon_narrow_sat_s32,
6857           gen_helper_neon_unarrow_sat32 },
6858         { NULL, NULL },
6859     };
6860     static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
6861         gen_helper_neon_narrow_sat_u8,
6862         gen_helper_neon_narrow_sat_u16,
6863         gen_helper_neon_narrow_sat_u32,
6864         NULL
6865     };
6866     NeonGenNarrowEnvFn *narrowfn;
6867
6868     int i;
6869
6870     assert(size < 4);
6871
6872     if (extract32(immh, 3, 1)) {
6873         unallocated_encoding(s);
6874         return;
6875     }
6876
6877     if (!fp_access_check(s)) {
6878         return;
6879     }
6880
6881     if (is_u_shift) {
6882         narrowfn = unsigned_narrow_fns[size];
6883     } else {
6884         narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
6885     }
6886
6887     tcg_rn = tcg_temp_new_i64();
6888     tcg_rd = tcg_temp_new_i64();
6889     tcg_rd_narrowed = tcg_temp_new_i32();
6890     tcg_final = tcg_const_i64(0);
6891
6892     if (round) {
6893         uint64_t round_const = 1ULL << (shift - 1);
6894         tcg_round = tcg_const_i64(round_const);
6895     } else {
6896         tcg_round = NULL;
6897     }
6898
6899     for (i = 0; i < elements; i++) {
6900         read_vec_element(s, tcg_rn, rn, i, ldop);
6901         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
6902                                 false, is_u_shift, size+1, shift);
6903         narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
6904         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
6905         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
6906     }
6907
6908     if (!is_q) {
6909         write_vec_element(s, tcg_final, rd, 0, MO_64);
6910     } else {
6911         write_vec_element(s, tcg_final, rd, 1, MO_64);
6912     }
6913
6914     if (round) {
6915         tcg_temp_free_i64(tcg_round);
6916     }
6917     tcg_temp_free_i64(tcg_rn);
6918     tcg_temp_free_i64(tcg_rd);
6919     tcg_temp_free_i32(tcg_rd_narrowed);
6920     tcg_temp_free_i64(tcg_final);
6921
6922     clear_vec_high(s, is_q, rd);
6923 }
6924
6925 /* SQSHLU, UQSHL, SQSHL: saturating left shifts */
6926 static void handle_simd_qshl(DisasContext *s, bool scalar, bool is_q,
6927                              bool src_unsigned, bool dst_unsigned,
6928                              int immh, int immb, int rn, int rd)
6929 {
6930     int immhb = immh << 3 | immb;
6931     int size = 32 - clz32(immh) - 1;
6932     int shift = immhb - (8 << size);
6933     int pass;
6934
6935     assert(immh != 0);
6936     assert(!(scalar && is_q));
6937
6938     if (!scalar) {
6939         if (!is_q && extract32(immh, 3, 1)) {
6940             unallocated_encoding(s);
6941             return;
6942         }
6943
6944         /* Since we use the variable-shift helpers we must
6945          * replicate the shift count into each element of
6946          * the tcg_shift value.
6947          */
6948         switch (size) {
6949         case 0:
6950             shift |= shift << 8;
6951             /* fall through */
6952         case 1:
6953             shift |= shift << 16;
6954             break;
6955         case 2:
6956         case 3:
6957             break;
6958         default:
6959             g_assert_not_reached();
6960         }
6961     }
6962
6963     if (!fp_access_check(s)) {
6964         return;
6965     }
6966
6967     if (size == 3) {
6968         TCGv_i64 tcg_shift = tcg_const_i64(shift);
6969         static NeonGenTwo64OpEnvFn * const fns[2][2] = {
6970             { gen_helper_neon_qshl_s64, gen_helper_neon_qshlu_s64 },
6971             { NULL, gen_helper_neon_qshl_u64 },
6972         };
6973         NeonGenTwo64OpEnvFn *genfn = fns[src_unsigned][dst_unsigned];
6974         int maxpass = is_q ? 2 : 1;
6975
6976         for (pass = 0; pass < maxpass; pass++) {
6977             TCGv_i64 tcg_op = tcg_temp_new_i64();
6978
6979             read_vec_element(s, tcg_op, rn, pass, MO_64);
6980             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
6981             write_vec_element(s, tcg_op, rd, pass, MO_64);
6982
6983             tcg_temp_free_i64(tcg_op);
6984         }
6985         tcg_temp_free_i64(tcg_shift);
6986         clear_vec_high(s, is_q, rd);
6987     } else {
6988         TCGv_i32 tcg_shift = tcg_const_i32(shift);
6989         static NeonGenTwoOpEnvFn * const fns[2][2][3] = {
6990             {
6991                 { gen_helper_neon_qshl_s8,
6992                   gen_helper_neon_qshl_s16,
6993                   gen_helper_neon_qshl_s32 },
6994                 { gen_helper_neon_qshlu_s8,
6995                   gen_helper_neon_qshlu_s16,
6996                   gen_helper_neon_qshlu_s32 }
6997             }, {
6998                 { NULL, NULL, NULL },
6999                 { gen_helper_neon_qshl_u8,
7000                   gen_helper_neon_qshl_u16,
7001                   gen_helper_neon_qshl_u32 }
7002             }
7003         };
7004         NeonGenTwoOpEnvFn *genfn = fns[src_unsigned][dst_unsigned][size];
7005         TCGMemOp memop = scalar ? size : MO_32;
7006         int maxpass = scalar ? 1 : is_q ? 4 : 2;
7007
7008         for (pass = 0; pass < maxpass; pass++) {
7009             TCGv_i32 tcg_op = tcg_temp_new_i32();
7010
7011             read_vec_element_i32(s, tcg_op, rn, pass, memop);
7012             genfn(tcg_op, cpu_env, tcg_op, tcg_shift);
7013             if (scalar) {
7014                 switch (size) {
7015                 case 0:
7016                     tcg_gen_ext8u_i32(tcg_op, tcg_op);
7017                     break;
7018                 case 1:
7019                     tcg_gen_ext16u_i32(tcg_op, tcg_op);
7020                     break;
7021                 case 2:
7022                     break;
7023                 default:
7024                     g_assert_not_reached();
7025                 }
7026                 write_fp_sreg(s, rd, tcg_op);
7027             } else {
7028                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
7029             }
7030
7031             tcg_temp_free_i32(tcg_op);
7032         }
7033         tcg_temp_free_i32(tcg_shift);
7034
7035         if (!scalar) {
7036             clear_vec_high(s, is_q, rd);
7037         }
7038     }
7039 }
7040
7041 /* Common vector code for handling integer to FP conversion */
7042 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
7043                                    int elements, int is_signed,
7044                                    int fracbits, int size)
7045 {
7046     TCGv_ptr tcg_fpst = get_fpstatus_ptr(size == MO_16);
7047     TCGv_i32 tcg_shift = NULL;
7048
7049     TCGMemOp mop = size | (is_signed ? MO_SIGN : 0);
7050     int pass;
7051
7052     if (fracbits || size == MO_64) {
7053         tcg_shift = tcg_const_i32(fracbits);
7054     }
7055
7056     if (size == MO_64) {
7057         TCGv_i64 tcg_int64 = tcg_temp_new_i64();
7058         TCGv_i64 tcg_double = tcg_temp_new_i64();
7059
7060         for (pass = 0; pass < elements; pass++) {
7061             read_vec_element(s, tcg_int64, rn, pass, mop);
7062
7063             if (is_signed) {
7064                 gen_helper_vfp_sqtod(tcg_double, tcg_int64,
7065                                      tcg_shift, tcg_fpst);
7066             } else {
7067                 gen_helper_vfp_uqtod(tcg_double, tcg_int64,
7068                                      tcg_shift, tcg_fpst);
7069             }
7070             if (elements == 1) {
7071                 write_fp_dreg(s, rd, tcg_double);
7072             } else {
7073                 write_vec_element(s, tcg_double, rd, pass, MO_64);
7074             }
7075         }
7076
7077         tcg_temp_free_i64(tcg_int64);
7078         tcg_temp_free_i64(tcg_double);
7079
7080     } else {
7081         TCGv_i32 tcg_int32 = tcg_temp_new_i32();
7082         TCGv_i32 tcg_float = tcg_temp_new_i32();
7083
7084         for (pass = 0; pass < elements; pass++) {
7085             read_vec_element_i32(s, tcg_int32, rn, pass, mop);
7086
7087             switch (size) {
7088             case MO_32:
7089                 if (fracbits) {
7090                     if (is_signed) {
7091                         gen_helper_vfp_sltos(tcg_float, tcg_int32,
7092                                              tcg_shift, tcg_fpst);
7093                     } else {
7094                         gen_helper_vfp_ultos(tcg_float, tcg_int32,
7095                                              tcg_shift, tcg_fpst);
7096                     }
7097                 } else {
7098                     if (is_signed) {
7099                         gen_helper_vfp_sitos(tcg_float, tcg_int32, tcg_fpst);
7100                     } else {
7101                         gen_helper_vfp_uitos(tcg_float, tcg_int32, tcg_fpst);
7102                     }
7103                 }
7104                 break;
7105             case MO_16:
7106                 if (fracbits) {
7107                     if (is_signed) {
7108                         gen_helper_vfp_sltoh(tcg_float, tcg_int32,
7109                                              tcg_shift, tcg_fpst);
7110                     } else {
7111                         gen_helper_vfp_ultoh(tcg_float, tcg_int32,
7112                                              tcg_shift, tcg_fpst);
7113                     }
7114                 } else {
7115                     if (is_signed) {
7116                         gen_helper_vfp_sitoh(tcg_float, tcg_int32, tcg_fpst);
7117                     } else {
7118                         gen_helper_vfp_uitoh(tcg_float, tcg_int32, tcg_fpst);
7119                     }
7120                 }
7121                 break;
7122             default:
7123                 g_assert_not_reached();
7124             }
7125
7126             if (elements == 1) {
7127                 write_fp_sreg(s, rd, tcg_float);
7128             } else {
7129                 write_vec_element_i32(s, tcg_float, rd, pass, size);
7130             }
7131         }
7132
7133         tcg_temp_free_i32(tcg_int32);
7134         tcg_temp_free_i32(tcg_float);
7135     }
7136
7137     tcg_temp_free_ptr(tcg_fpst);
7138     if (tcg_shift) {
7139         tcg_temp_free_i32(tcg_shift);
7140     }
7141
7142     clear_vec_high(s, elements << size == 16, rd);
7143 }
7144
7145 /* UCVTF/SCVTF - Integer to FP conversion */
7146 static void handle_simd_shift_intfp_conv(DisasContext *s, bool is_scalar,
7147                                          bool is_q, bool is_u,
7148                                          int immh, int immb, int opcode,
7149                                          int rn, int rd)
7150 {
7151     bool is_double = extract32(immh, 3, 1);
7152     int size = is_double ? MO_64 : MO_32;
7153     int elements;
7154     int immhb = immh << 3 | immb;
7155     int fracbits = (is_double ? 128 : 64) - immhb;
7156
7157     if (!extract32(immh, 2, 2)) {
7158         unallocated_encoding(s);
7159         return;
7160     }
7161
7162     if (is_scalar) {
7163         elements = 1;
7164     } else {
7165         elements = is_double ? 2 : is_q ? 4 : 2;
7166         if (is_double && !is_q) {
7167             unallocated_encoding(s);
7168             return;
7169         }
7170     }
7171
7172     if (!fp_access_check(s)) {
7173         return;
7174     }
7175
7176     /* immh == 0 would be a failure of the decode logic */
7177     g_assert(immh);
7178
7179     handle_simd_intfp_conv(s, rd, rn, elements, !is_u, fracbits, size);
7180 }
7181
7182 /* FCVTZS, FVCVTZU - FP to fixedpoint conversion */
7183 static void handle_simd_shift_fpint_conv(DisasContext *s, bool is_scalar,
7184                                          bool is_q, bool is_u,
7185                                          int immh, int immb, int rn, int rd)
7186 {
7187     bool is_double = extract32(immh, 3, 1);
7188     int immhb = immh << 3 | immb;
7189     int fracbits = (is_double ? 128 : 64) - immhb;
7190     int pass;
7191     TCGv_ptr tcg_fpstatus;
7192     TCGv_i32 tcg_rmode, tcg_shift;
7193
7194     if (!extract32(immh, 2, 2)) {
7195         unallocated_encoding(s);
7196         return;
7197     }
7198
7199     if (!is_scalar && !is_q && is_double) {
7200         unallocated_encoding(s);
7201         return;
7202     }
7203
7204     if (!fp_access_check(s)) {
7205         return;
7206     }
7207
7208     assert(!(is_scalar && is_q));
7209
7210     tcg_rmode = tcg_const_i32(arm_rmode_to_sf(FPROUNDING_ZERO));
7211     tcg_fpstatus = get_fpstatus_ptr(false);
7212     gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
7213     tcg_shift = tcg_const_i32(fracbits);
7214
7215     if (is_double) {
7216         int maxpass = is_scalar ? 1 : 2;
7217
7218         for (pass = 0; pass < maxpass; pass++) {
7219             TCGv_i64 tcg_op = tcg_temp_new_i64();
7220
7221             read_vec_element(s, tcg_op, rn, pass, MO_64);
7222             if (is_u) {
7223                 gen_helper_vfp_touqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7224             } else {
7225                 gen_helper_vfp_tosqd(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7226             }
7227             write_vec_element(s, tcg_op, rd, pass, MO_64);
7228             tcg_temp_free_i64(tcg_op);
7229         }
7230         clear_vec_high(s, is_q, rd);
7231     } else {
7232         int maxpass = is_scalar ? 1 : is_q ? 4 : 2;
7233         for (pass = 0; pass < maxpass; pass++) {
7234             TCGv_i32 tcg_op = tcg_temp_new_i32();
7235
7236             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
7237             if (is_u) {
7238                 gen_helper_vfp_touls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7239             } else {
7240                 gen_helper_vfp_tosls(tcg_op, tcg_op, tcg_shift, tcg_fpstatus);
7241             }
7242             if (is_scalar) {
7243                 write_fp_sreg(s, rd, tcg_op);
7244             } else {
7245                 write_vec_element_i32(s, tcg_op, rd, pass, MO_32);
7246             }
7247             tcg_temp_free_i32(tcg_op);
7248         }
7249         if (!is_scalar) {
7250             clear_vec_high(s, is_q, rd);
7251         }
7252     }
7253
7254     tcg_temp_free_ptr(tcg_fpstatus);
7255     tcg_temp_free_i32(tcg_shift);
7256     gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
7257     tcg_temp_free_i32(tcg_rmode);
7258 }
7259
7260 /* AdvSIMD scalar shift by immediate
7261  *  31 30  29 28         23 22  19 18  16 15    11  10 9    5 4    0
7262  * +-----+---+-------------+------+------+--------+---+------+------+
7263  * | 0 1 | U | 1 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
7264  * +-----+---+-------------+------+------+--------+---+------+------+
7265  *
7266  * This is the scalar version so it works on a fixed sized registers
7267  */
7268 static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
7269 {
7270     int rd = extract32(insn, 0, 5);
7271     int rn = extract32(insn, 5, 5);
7272     int opcode = extract32(insn, 11, 5);
7273     int immb = extract32(insn, 16, 3);
7274     int immh = extract32(insn, 19, 4);
7275     bool is_u = extract32(insn, 29, 1);
7276
7277     if (immh == 0) {
7278         unallocated_encoding(s);
7279         return;
7280     }
7281
7282     switch (opcode) {
7283     case 0x08: /* SRI */
7284         if (!is_u) {
7285             unallocated_encoding(s);
7286             return;
7287         }
7288         /* fall through */
7289     case 0x00: /* SSHR / USHR */
7290     case 0x02: /* SSRA / USRA */
7291     case 0x04: /* SRSHR / URSHR */
7292     case 0x06: /* SRSRA / URSRA */
7293         handle_scalar_simd_shri(s, is_u, immh, immb, opcode, rn, rd);
7294         break;
7295     case 0x0a: /* SHL / SLI */
7296         handle_scalar_simd_shli(s, is_u, immh, immb, opcode, rn, rd);
7297         break;
7298     case 0x1c: /* SCVTF, UCVTF */
7299         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
7300                                      opcode, rn, rd);
7301         break;
7302     case 0x10: /* SQSHRUN, SQSHRUN2 */
7303     case 0x11: /* SQRSHRUN, SQRSHRUN2 */
7304         if (!is_u) {
7305             unallocated_encoding(s);
7306             return;
7307         }
7308         handle_vec_simd_sqshrn(s, true, false, false, true,
7309                                immh, immb, opcode, rn, rd);
7310         break;
7311     case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
7312     case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
7313         handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
7314                                immh, immb, opcode, rn, rd);
7315         break;
7316     case 0xc: /* SQSHLU */
7317         if (!is_u) {
7318             unallocated_encoding(s);
7319             return;
7320         }
7321         handle_simd_qshl(s, true, false, false, true, immh, immb, rn, rd);
7322         break;
7323     case 0xe: /* SQSHL, UQSHL */
7324         handle_simd_qshl(s, true, false, is_u, is_u, immh, immb, rn, rd);
7325         break;
7326     case 0x1f: /* FCVTZS, FCVTZU */
7327         handle_simd_shift_fpint_conv(s, true, false, is_u, immh, immb, rn, rd);
7328         break;
7329     default:
7330         unallocated_encoding(s);
7331         break;
7332     }
7333 }
7334
7335 /* AdvSIMD scalar three different
7336  *  31 30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
7337  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7338  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
7339  * +-----+---+-----------+------+---+------+--------+-----+------+------+
7340  */
7341 static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
7342 {
7343     bool is_u = extract32(insn, 29, 1);
7344     int size = extract32(insn, 22, 2);
7345     int opcode = extract32(insn, 12, 4);
7346     int rm = extract32(insn, 16, 5);
7347     int rn = extract32(insn, 5, 5);
7348     int rd = extract32(insn, 0, 5);
7349
7350     if (is_u) {
7351         unallocated_encoding(s);
7352         return;
7353     }
7354
7355     switch (opcode) {
7356     case 0x9: /* SQDMLAL, SQDMLAL2 */
7357     case 0xb: /* SQDMLSL, SQDMLSL2 */
7358     case 0xd: /* SQDMULL, SQDMULL2 */
7359         if (size == 0 || size == 3) {
7360             unallocated_encoding(s);
7361             return;
7362         }
7363         break;
7364     default:
7365         unallocated_encoding(s);
7366         return;
7367     }
7368
7369     if (!fp_access_check(s)) {
7370         return;
7371     }
7372
7373     if (size == 2) {
7374         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7375         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7376         TCGv_i64 tcg_res = tcg_temp_new_i64();
7377
7378         read_vec_element(s, tcg_op1, rn, 0, MO_32 | MO_SIGN);
7379         read_vec_element(s, tcg_op2, rm, 0, MO_32 | MO_SIGN);
7380
7381         tcg_gen_mul_i64(tcg_res, tcg_op1, tcg_op2);
7382         gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env, tcg_res, tcg_res);
7383
7384         switch (opcode) {
7385         case 0xd: /* SQDMULL, SQDMULL2 */
7386             break;
7387         case 0xb: /* SQDMLSL, SQDMLSL2 */
7388             tcg_gen_neg_i64(tcg_res, tcg_res);
7389             /* fall through */
7390         case 0x9: /* SQDMLAL, SQDMLAL2 */
7391             read_vec_element(s, tcg_op1, rd, 0, MO_64);
7392             gen_helper_neon_addl_saturate_s64(tcg_res, cpu_env,
7393                                               tcg_res, tcg_op1);
7394             break;
7395         default:
7396             g_assert_not_reached();
7397         }
7398
7399         write_fp_dreg(s, rd, tcg_res);
7400
7401         tcg_temp_free_i64(tcg_op1);
7402         tcg_temp_free_i64(tcg_op2);
7403         tcg_temp_free_i64(tcg_res);
7404     } else {
7405         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7406         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7407         TCGv_i64 tcg_res = tcg_temp_new_i64();
7408
7409         read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
7410         read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
7411
7412         gen_helper_neon_mull_s16(tcg_res, tcg_op1, tcg_op2);
7413         gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env, tcg_res, tcg_res);
7414
7415         switch (opcode) {
7416         case 0xd: /* SQDMULL, SQDMULL2 */
7417             break;
7418         case 0xb: /* SQDMLSL, SQDMLSL2 */
7419             gen_helper_neon_negl_u32(tcg_res, tcg_res);
7420             /* fall through */
7421         case 0x9: /* SQDMLAL, SQDMLAL2 */
7422         {
7423             TCGv_i64 tcg_op3 = tcg_temp_new_i64();
7424             read_vec_element(s, tcg_op3, rd, 0, MO_32);
7425             gen_helper_neon_addl_saturate_s32(tcg_res, cpu_env,
7426                                               tcg_res, tcg_op3);
7427             tcg_temp_free_i64(tcg_op3);
7428             break;
7429         }
7430         default:
7431             g_assert_not_reached();
7432         }
7433
7434         tcg_gen_ext32u_i64(tcg_res, tcg_res);
7435         write_fp_dreg(s, rd, tcg_res);
7436
7437         tcg_temp_free_i32(tcg_op1);
7438         tcg_temp_free_i32(tcg_op2);
7439         tcg_temp_free_i64(tcg_res);
7440     }
7441 }
7442
7443 /* CMTST : test is "if (X & Y != 0)". */
7444 static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
7445 {
7446     tcg_gen_and_i32(d, a, b);
7447     tcg_gen_setcondi_i32(TCG_COND_NE, d, d, 0);
7448     tcg_gen_neg_i32(d, d);
7449 }
7450
7451 static void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
7452 {
7453     tcg_gen_and_i64(d, a, b);
7454     tcg_gen_setcondi_i64(TCG_COND_NE, d, d, 0);
7455     tcg_gen_neg_i64(d, d);
7456 }
7457
7458 static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
7459 {
7460     tcg_gen_and_vec(vece, d, a, b);
7461     tcg_gen_dupi_vec(vece, a, 0);
7462     tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
7463 }
7464
7465 static void handle_3same_64(DisasContext *s, int opcode, bool u,
7466                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm)
7467 {
7468     /* Handle 64x64->64 opcodes which are shared between the scalar
7469      * and vector 3-same groups. We cover every opcode where size == 3
7470      * is valid in either the three-reg-same (integer, not pairwise)
7471      * or scalar-three-reg-same groups.
7472      */
7473     TCGCond cond;
7474
7475     switch (opcode) {
7476     case 0x1: /* SQADD */
7477         if (u) {
7478             gen_helper_neon_qadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7479         } else {
7480             gen_helper_neon_qadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7481         }
7482         break;
7483     case 0x5: /* SQSUB */
7484         if (u) {
7485             gen_helper_neon_qsub_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7486         } else {
7487             gen_helper_neon_qsub_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7488         }
7489         break;
7490     case 0x6: /* CMGT, CMHI */
7491         /* 64 bit integer comparison, result = test ? (2^64 - 1) : 0.
7492          * We implement this using setcond (test) and then negating.
7493          */
7494         cond = u ? TCG_COND_GTU : TCG_COND_GT;
7495     do_cmop:
7496         tcg_gen_setcond_i64(cond, tcg_rd, tcg_rn, tcg_rm);
7497         tcg_gen_neg_i64(tcg_rd, tcg_rd);
7498         break;
7499     case 0x7: /* CMGE, CMHS */
7500         cond = u ? TCG_COND_GEU : TCG_COND_GE;
7501         goto do_cmop;
7502     case 0x11: /* CMTST, CMEQ */
7503         if (u) {
7504             cond = TCG_COND_EQ;
7505             goto do_cmop;
7506         }
7507         gen_cmtst_i64(tcg_rd, tcg_rn, tcg_rm);
7508         break;
7509     case 0x8: /* SSHL, USHL */
7510         if (u) {
7511             gen_helper_neon_shl_u64(tcg_rd, tcg_rn, tcg_rm);
7512         } else {
7513             gen_helper_neon_shl_s64(tcg_rd, tcg_rn, tcg_rm);
7514         }
7515         break;
7516     case 0x9: /* SQSHL, UQSHL */
7517         if (u) {
7518             gen_helper_neon_qshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7519         } else {
7520             gen_helper_neon_qshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7521         }
7522         break;
7523     case 0xa: /* SRSHL, URSHL */
7524         if (u) {
7525             gen_helper_neon_rshl_u64(tcg_rd, tcg_rn, tcg_rm);
7526         } else {
7527             gen_helper_neon_rshl_s64(tcg_rd, tcg_rn, tcg_rm);
7528         }
7529         break;
7530     case 0xb: /* SQRSHL, UQRSHL */
7531         if (u) {
7532             gen_helper_neon_qrshl_u64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7533         } else {
7534             gen_helper_neon_qrshl_s64(tcg_rd, cpu_env, tcg_rn, tcg_rm);
7535         }
7536         break;
7537     case 0x10: /* ADD, SUB */
7538         if (u) {
7539             tcg_gen_sub_i64(tcg_rd, tcg_rn, tcg_rm);
7540         } else {
7541             tcg_gen_add_i64(tcg_rd, tcg_rn, tcg_rm);
7542         }
7543         break;
7544     default:
7545         g_assert_not_reached();
7546     }
7547 }
7548
7549 /* Handle the 3-same-operands float operations; shared by the scalar
7550  * and vector encodings. The caller must filter out any encodings
7551  * not allocated for the encoding it is dealing with.
7552  */
7553 static void handle_3same_float(DisasContext *s, int size, int elements,
7554                                int fpopcode, int rd, int rn, int rm)
7555 {
7556     int pass;
7557     TCGv_ptr fpst = get_fpstatus_ptr(false);
7558
7559     for (pass = 0; pass < elements; pass++) {
7560         if (size) {
7561             /* Double */
7562             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
7563             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
7564             TCGv_i64 tcg_res = tcg_temp_new_i64();
7565
7566             read_vec_element(s, tcg_op1, rn, pass, MO_64);
7567             read_vec_element(s, tcg_op2, rm, pass, MO_64);
7568
7569             switch (fpopcode) {
7570             case 0x39: /* FMLS */
7571                 /* As usual for ARM, separate negation for fused multiply-add */
7572                 gen_helper_vfp_negd(tcg_op1, tcg_op1);
7573                 /* fall through */
7574             case 0x19: /* FMLA */
7575                 read_vec_element(s, tcg_res, rd, pass, MO_64);
7576                 gen_helper_vfp_muladdd(tcg_res, tcg_op1, tcg_op2,
7577                                        tcg_res, fpst);
7578                 break;
7579             case 0x18: /* FMAXNM */
7580                 gen_helper_vfp_maxnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7581                 break;
7582             case 0x1a: /* FADD */
7583                 gen_helper_vfp_addd(tcg_res, tcg_op1, tcg_op2, fpst);
7584                 break;
7585             case 0x1b: /* FMULX */
7586                 gen_helper_vfp_mulxd(tcg_res, tcg_op1, tcg_op2, fpst);
7587                 break;
7588             case 0x1c: /* FCMEQ */
7589                 gen_helper_neon_ceq_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7590                 break;
7591             case 0x1e: /* FMAX */
7592                 gen_helper_vfp_maxd(tcg_res, tcg_op1, tcg_op2, fpst);
7593                 break;
7594             case 0x1f: /* FRECPS */
7595                 gen_helper_recpsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7596                 break;
7597             case 0x38: /* FMINNM */
7598                 gen_helper_vfp_minnumd(tcg_res, tcg_op1, tcg_op2, fpst);
7599                 break;
7600             case 0x3a: /* FSUB */
7601                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7602                 break;
7603             case 0x3e: /* FMIN */
7604                 gen_helper_vfp_mind(tcg_res, tcg_op1, tcg_op2, fpst);
7605                 break;
7606             case 0x3f: /* FRSQRTS */
7607                 gen_helper_rsqrtsf_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7608                 break;
7609             case 0x5b: /* FMUL */
7610                 gen_helper_vfp_muld(tcg_res, tcg_op1, tcg_op2, fpst);
7611                 break;
7612             case 0x5c: /* FCMGE */
7613                 gen_helper_neon_cge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7614                 break;
7615             case 0x5d: /* FACGE */
7616                 gen_helper_neon_acge_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7617                 break;
7618             case 0x5f: /* FDIV */
7619                 gen_helper_vfp_divd(tcg_res, tcg_op1, tcg_op2, fpst);
7620                 break;
7621             case 0x7a: /* FABD */
7622                 gen_helper_vfp_subd(tcg_res, tcg_op1, tcg_op2, fpst);
7623                 gen_helper_vfp_absd(tcg_res, tcg_res);
7624                 break;
7625             case 0x7c: /* FCMGT */
7626                 gen_helper_neon_cgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7627                 break;
7628             case 0x7d: /* FACGT */
7629                 gen_helper_neon_acgt_f64(tcg_res, tcg_op1, tcg_op2, fpst);
7630                 break;
7631             default:
7632                 g_assert_not_reached();
7633             }
7634
7635             write_vec_element(s, tcg_res, rd, pass, MO_64);
7636
7637             tcg_temp_free_i64(tcg_res);
7638             tcg_temp_free_i64(tcg_op1);
7639             tcg_temp_free_i64(tcg_op2);
7640         } else {
7641             /* Single */
7642             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
7643             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
7644             TCGv_i32 tcg_res = tcg_temp_new_i32();
7645
7646             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
7647             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
7648
7649             switch (fpopcode) {
7650             case 0x39: /* FMLS */
7651                 /* As usual for ARM, separate negation for fused multiply-add */
7652                 gen_helper_vfp_negs(tcg_op1, tcg_op1);
7653                 /* fall through */
7654             case 0x19: /* FMLA */
7655                 read_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7656                 gen_helper_vfp_muladds(tcg_res, tcg_op1, tcg_op2,
7657                                        tcg_res, fpst);
7658                 break;
7659             case 0x1a: /* FADD */
7660                 gen_helper_vfp_adds(tcg_res, tcg_op1, tcg_op2, fpst);
7661                 break;
7662             case 0x1b: /* FMULX */
7663                 gen_helper_vfp_mulxs(tcg_res, tcg_op1, tcg_op2, fpst);
7664                 break;
7665             case 0x1c: /* FCMEQ */
7666                 gen_helper_neon_ceq_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7667                 break;
7668             case 0x1e: /* FMAX */
7669                 gen_helper_vfp_maxs(tcg_res, tcg_op1, tcg_op2, fpst);
7670                 break;
7671             case 0x1f: /* FRECPS */
7672                 gen_helper_recpsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7673                 break;
7674             case 0x18: /* FMAXNM */
7675                 gen_helper_vfp_maxnums(tcg_res, tcg_op1, tcg_op2, fpst);
7676                 break;
7677             case 0x38: /* FMINNM */
7678                 gen_helper_vfp_minnums(tcg_res, tcg_op1, tcg_op2, fpst);
7679                 break;
7680             case 0x3a: /* FSUB */
7681                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7682                 break;
7683             case 0x3e: /* FMIN */
7684                 gen_helper_vfp_mins(tcg_res, tcg_op1, tcg_op2, fpst);
7685                 break;
7686             case 0x3f: /* FRSQRTS */
7687                 gen_helper_rsqrtsf_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7688                 break;
7689             case 0x5b: /* FMUL */
7690                 gen_helper_vfp_muls(tcg_res, tcg_op1, tcg_op2, fpst);
7691                 break;
7692             case 0x5c: /* FCMGE */
7693                 gen_helper_neon_cge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7694                 break;
7695             case 0x5d: /* FACGE */
7696                 gen_helper_neon_acge_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7697                 break;
7698             case 0x5f: /* FDIV */
7699                 gen_helper_vfp_divs(tcg_res, tcg_op1, tcg_op2, fpst);
7700                 break;
7701             case 0x7a: /* FABD */
7702                 gen_helper_vfp_subs(tcg_res, tcg_op1, tcg_op2, fpst);
7703                 gen_helper_vfp_abss(tcg_res, tcg_res);
7704                 break;
7705             case 0x7c: /* FCMGT */
7706                 gen_helper_neon_cgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7707                 break;
7708             case 0x7d: /* FACGT */
7709                 gen_helper_neon_acgt_f32(tcg_res, tcg_op1, tcg_op2, fpst);
7710                 break;
7711             default:
7712                 g_assert_not_reached();
7713             }
7714
7715             if (elements == 1) {
7716                 /* scalar single so clear high part */
7717                 TCGv_i64 tcg_tmp = tcg_temp_new_i64();
7718
7719                 tcg_gen_extu_i32_i64(tcg_tmp, tcg_res);
7720                 write_vec_element(s, tcg_tmp, rd, pass, MO_64);
7721                 tcg_temp_free_i64(tcg_tmp);
7722             } else {
7723                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
7724             }
7725
7726             tcg_temp_free_i32(tcg_res);
7727             tcg_temp_free_i32(tcg_op1);
7728             tcg_temp_free_i32(tcg_op2);
7729         }
7730     }
7731
7732     tcg_temp_free_ptr(fpst);
7733
7734     clear_vec_high(s, elements * (size ? 8 : 4) > 8, rd);
7735 }
7736
7737 /* AdvSIMD scalar three same
7738  *  31 30  29 28       24 23  22  21 20  16 15    11  10 9    5 4    0
7739  * +-----+---+-----------+------+---+------+--------+---+------+------+
7740  * | 0 1 | U | 1 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
7741  * +-----+---+-----------+------+---+------+--------+---+------+------+
7742  */
7743 static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
7744 {
7745     int rd = extract32(insn, 0, 5);
7746     int rn = extract32(insn, 5, 5);
7747     int opcode = extract32(insn, 11, 5);
7748     int rm = extract32(insn, 16, 5);
7749     int size = extract32(insn, 22, 2);
7750     bool u = extract32(insn, 29, 1);
7751     TCGv_i64 tcg_rd;
7752
7753     if (opcode >= 0x18) {
7754         /* Floating point: U, size[1] and opcode indicate operation */
7755         int fpopcode = opcode | (extract32(size, 1, 1) << 5) | (u << 6);
7756         switch (fpopcode) {
7757         case 0x1b: /* FMULX */
7758         case 0x1f: /* FRECPS */
7759         case 0x3f: /* FRSQRTS */
7760         case 0x5d: /* FACGE */
7761         case 0x7d: /* FACGT */
7762         case 0x1c: /* FCMEQ */
7763         case 0x5c: /* FCMGE */
7764         case 0x7c: /* FCMGT */
7765         case 0x7a: /* FABD */
7766             break;
7767         default:
7768             unallocated_encoding(s);
7769             return;
7770         }
7771
7772         if (!fp_access_check(s)) {
7773             return;
7774         }
7775
7776         handle_3same_float(s, extract32(size, 0, 1), 1, fpopcode, rd, rn, rm);
7777         return;
7778     }
7779
7780     switch (opcode) {
7781     case 0x1: /* SQADD, UQADD */
7782     case 0x5: /* SQSUB, UQSUB */
7783     case 0x9: /* SQSHL, UQSHL */
7784     case 0xb: /* SQRSHL, UQRSHL */
7785         break;
7786     case 0x8: /* SSHL, USHL */
7787     case 0xa: /* SRSHL, URSHL */
7788     case 0x6: /* CMGT, CMHI */
7789     case 0x7: /* CMGE, CMHS */
7790     case 0x11: /* CMTST, CMEQ */
7791     case 0x10: /* ADD, SUB (vector) */
7792         if (size != 3) {
7793             unallocated_encoding(s);
7794             return;
7795         }
7796         break;
7797     case 0x16: /* SQDMULH, SQRDMULH (vector) */
7798         if (size != 1 && size != 2) {
7799             unallocated_encoding(s);
7800             return;
7801         }
7802         break;
7803     default:
7804         unallocated_encoding(s);
7805         return;
7806     }
7807
7808     if (!fp_access_check(s)) {
7809         return;
7810     }
7811
7812     tcg_rd = tcg_temp_new_i64();
7813
7814     if (size == 3) {
7815         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
7816         TCGv_i64 tcg_rm = read_fp_dreg(s, rm);
7817
7818         handle_3same_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rm);
7819         tcg_temp_free_i64(tcg_rn);
7820         tcg_temp_free_i64(tcg_rm);
7821     } else {
7822         /* Do a single operation on the lowest element in the vector.
7823          * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
7824          * no side effects for all these operations.
7825          * OPTME: special-purpose helpers would avoid doing some
7826          * unnecessary work in the helper for the 8 and 16 bit cases.
7827          */
7828         NeonGenTwoOpEnvFn *genenvfn;
7829         TCGv_i32 tcg_rn = tcg_temp_new_i32();
7830         TCGv_i32 tcg_rm = tcg_temp_new_i32();
7831         TCGv_i32 tcg_rd32 = tcg_temp_new_i32();
7832
7833         read_vec_element_i32(s, tcg_rn, rn, 0, size);
7834         read_vec_element_i32(s, tcg_rm, rm, 0, size);
7835
7836         switch (opcode) {
7837         case 0x1: /* SQADD, UQADD */
7838         {
7839             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7840                 { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
7841                 { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
7842                 { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
7843             };
7844             genenvfn = fns[size][u];
7845             break;
7846         }
7847         case 0x5: /* SQSUB, UQSUB */
7848         {
7849             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7850                 { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
7851                 { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
7852                 { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
7853             };
7854             genenvfn = fns[size][u];
7855             break;
7856         }
7857         case 0x9: /* SQSHL, UQSHL */
7858         {
7859             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7860                 { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
7861                 { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
7862                 { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
7863             };
7864             genenvfn = fns[size][u];
7865             break;
7866         }
7867         case 0xb: /* SQRSHL, UQRSHL */
7868         {
7869             static NeonGenTwoOpEnvFn * const fns[3][2] = {
7870                 { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
7871                 { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
7872                 { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
7873             };
7874             genenvfn = fns[size][u];
7875             break;
7876         }
7877         case 0x16: /* SQDMULH, SQRDMULH */
7878         {
7879             static NeonGenTwoOpEnvFn * const fns[2][2] = {
7880                 { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
7881                 { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
7882             };
7883             assert(size == 1 || size == 2);
7884             genenvfn = fns[size - 1][u];
7885             break;
7886         }
7887         default:
7888             g_assert_not_reached();
7889         }
7890
7891         genenvfn(tcg_rd32, cpu_env, tcg_rn, tcg_rm);
7892         tcg_gen_extu_i32_i64(tcg_rd, tcg_rd32);
7893         tcg_temp_free_i32(tcg_rd32);
7894         tcg_temp_free_i32(tcg_rn);
7895         tcg_temp_free_i32(tcg_rm);
7896     }
7897
7898     write_fp_dreg(s, rd, tcg_rd);
7899
7900     tcg_temp_free_i64(tcg_rd);
7901 }
7902
7903 /* AdvSIMD scalar three same FP16
7904  *  31 30  29 28       24 23  22 21 20  16 15 14 13    11 10  9  5 4  0
7905  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
7906  * | 0 1 | U | 1 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 | Rn | Rd |
7907  * +-----+---+-----------+---+-----+------+-----+--------+---+----+----+
7908  * v: 0101 1110 0100 0000 0000 0100 0000 0000 => 5e400400
7909  * m: 1101 1111 0110 0000 1100 0100 0000 0000 => df60c400
7910  */
7911 static void disas_simd_scalar_three_reg_same_fp16(DisasContext *s,
7912                                                   uint32_t insn)
7913 {
7914     int rd = extract32(insn, 0, 5);
7915     int rn = extract32(insn, 5, 5);
7916     int opcode = extract32(insn, 11, 3);
7917     int rm = extract32(insn, 16, 5);
7918     bool u = extract32(insn, 29, 1);
7919     bool a = extract32(insn, 23, 1);
7920     int fpopcode = opcode | (a << 3) |  (u << 4);
7921     TCGv_ptr fpst;
7922     TCGv_i32 tcg_op1;
7923     TCGv_i32 tcg_op2;
7924     TCGv_i32 tcg_res;
7925
7926     switch (fpopcode) {
7927     case 0x03: /* FMULX */
7928     case 0x04: /* FCMEQ (reg) */
7929     case 0x07: /* FRECPS */
7930     case 0x0f: /* FRSQRTS */
7931     case 0x14: /* FCMGE (reg) */
7932     case 0x15: /* FACGE */
7933     case 0x1a: /* FABD */
7934     case 0x1c: /* FCMGT (reg) */
7935     case 0x1d: /* FACGT */
7936         break;
7937     default:
7938         unallocated_encoding(s);
7939         return;
7940     }
7941
7942     if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
7943         unallocated_encoding(s);
7944     }
7945
7946     if (!fp_access_check(s)) {
7947         return;
7948     }
7949
7950     fpst = get_fpstatus_ptr(true);
7951
7952     tcg_op1 = tcg_temp_new_i32();
7953     tcg_op2 = tcg_temp_new_i32();
7954     tcg_res = tcg_temp_new_i32();
7955
7956     read_vec_element_i32(s, tcg_op1, rn, 0, MO_16);
7957     read_vec_element_i32(s, tcg_op2, rm, 0, MO_16);
7958
7959     switch (fpopcode) {
7960     case 0x03: /* FMULX */
7961         gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
7962         break;
7963     case 0x04: /* FCMEQ (reg) */
7964         gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
7965         break;
7966     case 0x07: /* FRECPS */
7967         gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
7968         break;
7969     case 0x0f: /* FRSQRTS */
7970         gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
7971         break;
7972     case 0x14: /* FCMGE (reg) */
7973         gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
7974         break;
7975     case 0x15: /* FACGE */
7976         gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
7977         break;
7978     case 0x1a: /* FABD */
7979         gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
7980         tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
7981         break;
7982     case 0x1c: /* FCMGT (reg) */
7983         gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
7984         break;
7985     case 0x1d: /* FACGT */
7986         gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
7987         break;
7988     default:
7989         g_assert_not_reached();
7990     }
7991
7992     write_fp_sreg(s, rd, tcg_res);
7993
7994
7995     tcg_temp_free_i32(tcg_res);
7996     tcg_temp_free_i32(tcg_op1);
7997     tcg_temp_free_i32(tcg_op2);
7998     tcg_temp_free_ptr(fpst);
7999 }
8000
8001 /* AdvSIMD scalar three same extra
8002  *  31 30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
8003  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
8004  * | 0 1 | U | 1 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
8005  * +-----+---+-----------+------+---+------+---+--------+---+----+----+
8006  */
8007 static void disas_simd_scalar_three_reg_same_extra(DisasContext *s,
8008                                                    uint32_t insn)
8009 {
8010     int rd = extract32(insn, 0, 5);
8011     int rn = extract32(insn, 5, 5);
8012     int opcode = extract32(insn, 11, 4);
8013     int rm = extract32(insn, 16, 5);
8014     int size = extract32(insn, 22, 2);
8015     bool u = extract32(insn, 29, 1);
8016     TCGv_i32 ele1, ele2, ele3;
8017     TCGv_i64 res;
8018     int feature;
8019
8020     switch (u * 16 + opcode) {
8021     case 0x10: /* SQRDMLAH (vector) */
8022     case 0x11: /* SQRDMLSH (vector) */
8023         if (size != 1 && size != 2) {
8024             unallocated_encoding(s);
8025             return;
8026         }
8027         feature = ARM_FEATURE_V8_RDM;
8028         break;
8029     default:
8030         unallocated_encoding(s);
8031         return;
8032     }
8033     if (!arm_dc_feature(s, feature)) {
8034         unallocated_encoding(s);
8035         return;
8036     }
8037     if (!fp_access_check(s)) {
8038         return;
8039     }
8040
8041     /* Do a single operation on the lowest element in the vector.
8042      * We use the standard Neon helpers and rely on 0 OP 0 == 0
8043      * with no side effects for all these operations.
8044      * OPTME: special-purpose helpers would avoid doing some
8045      * unnecessary work in the helper for the 16 bit cases.
8046      */
8047     ele1 = tcg_temp_new_i32();
8048     ele2 = tcg_temp_new_i32();
8049     ele3 = tcg_temp_new_i32();
8050
8051     read_vec_element_i32(s, ele1, rn, 0, size);
8052     read_vec_element_i32(s, ele2, rm, 0, size);
8053     read_vec_element_i32(s, ele3, rd, 0, size);
8054
8055     switch (opcode) {
8056     case 0x0: /* SQRDMLAH */
8057         if (size == 1) {
8058             gen_helper_neon_qrdmlah_s16(ele3, cpu_env, ele1, ele2, ele3);
8059         } else {
8060             gen_helper_neon_qrdmlah_s32(ele3, cpu_env, ele1, ele2, ele3);
8061         }
8062         break;
8063     case 0x1: /* SQRDMLSH */
8064         if (size == 1) {
8065             gen_helper_neon_qrdmlsh_s16(ele3, cpu_env, ele1, ele2, ele3);
8066         } else {
8067             gen_helper_neon_qrdmlsh_s32(ele3, cpu_env, ele1, ele2, ele3);
8068         }
8069         break;
8070     default:
8071         g_assert_not_reached();
8072     }
8073     tcg_temp_free_i32(ele1);
8074     tcg_temp_free_i32(ele2);
8075
8076     res = tcg_temp_new_i64();
8077     tcg_gen_extu_i32_i64(res, ele3);
8078     tcg_temp_free_i32(ele3);
8079
8080     write_fp_dreg(s, rd, res);
8081     tcg_temp_free_i64(res);
8082 }
8083
8084 static void handle_2misc_64(DisasContext *s, int opcode, bool u,
8085                             TCGv_i64 tcg_rd, TCGv_i64 tcg_rn,
8086                             TCGv_i32 tcg_rmode, TCGv_ptr tcg_fpstatus)
8087 {
8088     /* Handle 64->64 opcodes which are shared between the scalar and
8089      * vector 2-reg-misc groups. We cover every integer opcode where size == 3
8090      * is valid in either group and also the double-precision fp ops.
8091      * The caller only need provide tcg_rmode and tcg_fpstatus if the op
8092      * requires them.
8093      */
8094     TCGCond cond;
8095
8096     switch (opcode) {
8097     case 0x4: /* CLS, CLZ */
8098         if (u) {
8099             tcg_gen_clzi_i64(tcg_rd, tcg_rn, 64);
8100         } else {
8101             tcg_gen_clrsb_i64(tcg_rd, tcg_rn);
8102         }
8103         break;
8104     case 0x5: /* NOT */
8105         /* This opcode is shared with CNT and RBIT but we have earlier
8106          * enforced that size == 3 if and only if this is the NOT insn.
8107          */
8108         tcg_gen_not_i64(tcg_rd, tcg_rn);
8109         break;
8110     case 0x7: /* SQABS, SQNEG */
8111         if (u) {
8112             gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
8113         } else {
8114             gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
8115         }
8116         break;
8117     case 0xa: /* CMLT */
8118         /* 64 bit integer comparison against zero, result is
8119          * test ? (2^64 - 1) : 0. We implement via setcond(!test) and
8120          * subtracting 1.
8121          */
8122         cond = TCG_COND_LT;
8123     do_cmop:
8124         tcg_gen_setcondi_i64(cond, tcg_rd, tcg_rn, 0);
8125         tcg_gen_neg_i64(tcg_rd, tcg_rd);
8126         break;
8127     case 0x8: /* CMGT, CMGE */
8128         cond = u ? TCG_COND_GE : TCG_COND_GT;
8129         goto do_cmop;
8130     case 0x9: /* CMEQ, CMLE */
8131         cond = u ? TCG_COND_LE : TCG_COND_EQ;
8132         goto do_cmop;
8133     case 0xb: /* ABS, NEG */
8134         if (u) {
8135             tcg_gen_neg_i64(tcg_rd, tcg_rn);
8136         } else {
8137             TCGv_i64 tcg_zero = tcg_const_i64(0);
8138             tcg_gen_neg_i64(tcg_rd, tcg_rn);
8139             tcg_gen_movcond_i64(TCG_COND_GT, tcg_rd, tcg_rn, tcg_zero,
8140                                 tcg_rn, tcg_rd);
8141             tcg_temp_free_i64(tcg_zero);
8142         }
8143         break;
8144     case 0x2f: /* FABS */
8145         gen_helper_vfp_absd(tcg_rd, tcg_rn);
8146         break;
8147     case 0x6f: /* FNEG */
8148         gen_helper_vfp_negd(tcg_rd, tcg_rn);
8149         break;
8150     case 0x7f: /* FSQRT */
8151         gen_helper_vfp_sqrtd(tcg_rd, tcg_rn, cpu_env);
8152         break;
8153     case 0x1a: /* FCVTNS */
8154     case 0x1b: /* FCVTMS */
8155     case 0x1c: /* FCVTAS */
8156     case 0x3a: /* FCVTPS */
8157     case 0x3b: /* FCVTZS */
8158     {
8159         TCGv_i32 tcg_shift = tcg_const_i32(0);
8160         gen_helper_vfp_tosqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8161         tcg_temp_free_i32(tcg_shift);
8162         break;
8163     }
8164     case 0x5a: /* FCVTNU */
8165     case 0x5b: /* FCVTMU */
8166     case 0x5c: /* FCVTAU */
8167     case 0x7a: /* FCVTPU */
8168     case 0x7b: /* FCVTZU */
8169     {
8170         TCGv_i32 tcg_shift = tcg_const_i32(0);
8171         gen_helper_vfp_touqd(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8172         tcg_temp_free_i32(tcg_shift);
8173         break;
8174     }
8175     case 0x18: /* FRINTN */
8176     case 0x19: /* FRINTM */
8177     case 0x38: /* FRINTP */
8178     case 0x39: /* FRINTZ */
8179     case 0x58: /* FRINTA */
8180     case 0x79: /* FRINTI */
8181         gen_helper_rintd(tcg_rd, tcg_rn, tcg_fpstatus);
8182         break;
8183     case 0x59: /* FRINTX */
8184         gen_helper_rintd_exact(tcg_rd, tcg_rn, tcg_fpstatus);
8185         break;
8186     default:
8187         g_assert_not_reached();
8188     }
8189 }
8190
8191 static void handle_2misc_fcmp_zero(DisasContext *s, int opcode,
8192                                    bool is_scalar, bool is_u, bool is_q,
8193                                    int size, int rn, int rd)
8194 {
8195     bool is_double = (size == MO_64);
8196     TCGv_ptr fpst;
8197
8198     if (!fp_access_check(s)) {
8199         return;
8200     }
8201
8202     fpst = get_fpstatus_ptr(size == MO_16);
8203
8204     if (is_double) {
8205         TCGv_i64 tcg_op = tcg_temp_new_i64();
8206         TCGv_i64 tcg_zero = tcg_const_i64(0);
8207         TCGv_i64 tcg_res = tcg_temp_new_i64();
8208         NeonGenTwoDoubleOPFn *genfn;
8209         bool swap = false;
8210         int pass;
8211
8212         switch (opcode) {
8213         case 0x2e: /* FCMLT (zero) */
8214             swap = true;
8215             /* fallthrough */
8216         case 0x2c: /* FCMGT (zero) */
8217             genfn = gen_helper_neon_cgt_f64;
8218             break;
8219         case 0x2d: /* FCMEQ (zero) */
8220             genfn = gen_helper_neon_ceq_f64;
8221             break;
8222         case 0x6d: /* FCMLE (zero) */
8223             swap = true;
8224             /* fall through */
8225         case 0x6c: /* FCMGE (zero) */
8226             genfn = gen_helper_neon_cge_f64;
8227             break;
8228         default:
8229             g_assert_not_reached();
8230         }
8231
8232         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8233             read_vec_element(s, tcg_op, rn, pass, MO_64);
8234             if (swap) {
8235                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
8236             } else {
8237                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
8238             }
8239             write_vec_element(s, tcg_res, rd, pass, MO_64);
8240         }
8241         tcg_temp_free_i64(tcg_res);
8242         tcg_temp_free_i64(tcg_zero);
8243         tcg_temp_free_i64(tcg_op);
8244
8245         clear_vec_high(s, !is_scalar, rd);
8246     } else {
8247         TCGv_i32 tcg_op = tcg_temp_new_i32();
8248         TCGv_i32 tcg_zero = tcg_const_i32(0);
8249         TCGv_i32 tcg_res = tcg_temp_new_i32();
8250         NeonGenTwoSingleOPFn *genfn;
8251         bool swap = false;
8252         int pass, maxpasses;
8253
8254         if (size == MO_16) {
8255             switch (opcode) {
8256             case 0x2e: /* FCMLT (zero) */
8257                 swap = true;
8258                 /* fall through */
8259             case 0x2c: /* FCMGT (zero) */
8260                 genfn = gen_helper_advsimd_cgt_f16;
8261                 break;
8262             case 0x2d: /* FCMEQ (zero) */
8263                 genfn = gen_helper_advsimd_ceq_f16;
8264                 break;
8265             case 0x6d: /* FCMLE (zero) */
8266                 swap = true;
8267                 /* fall through */
8268             case 0x6c: /* FCMGE (zero) */
8269                 genfn = gen_helper_advsimd_cge_f16;
8270                 break;
8271             default:
8272                 g_assert_not_reached();
8273             }
8274         } else {
8275             switch (opcode) {
8276             case 0x2e: /* FCMLT (zero) */
8277                 swap = true;
8278                 /* fall through */
8279             case 0x2c: /* FCMGT (zero) */
8280                 genfn = gen_helper_neon_cgt_f32;
8281                 break;
8282             case 0x2d: /* FCMEQ (zero) */
8283                 genfn = gen_helper_neon_ceq_f32;
8284                 break;
8285             case 0x6d: /* FCMLE (zero) */
8286                 swap = true;
8287                 /* fall through */
8288             case 0x6c: /* FCMGE (zero) */
8289                 genfn = gen_helper_neon_cge_f32;
8290                 break;
8291             default:
8292                 g_assert_not_reached();
8293             }
8294         }
8295
8296         if (is_scalar) {
8297             maxpasses = 1;
8298         } else {
8299             int vector_size = 8 << is_q;
8300             maxpasses = vector_size >> size;
8301         }
8302
8303         for (pass = 0; pass < maxpasses; pass++) {
8304             read_vec_element_i32(s, tcg_op, rn, pass, size);
8305             if (swap) {
8306                 genfn(tcg_res, tcg_zero, tcg_op, fpst);
8307             } else {
8308                 genfn(tcg_res, tcg_op, tcg_zero, fpst);
8309             }
8310             if (is_scalar) {
8311                 write_fp_sreg(s, rd, tcg_res);
8312             } else {
8313                 write_vec_element_i32(s, tcg_res, rd, pass, size);
8314             }
8315         }
8316         tcg_temp_free_i32(tcg_res);
8317         tcg_temp_free_i32(tcg_zero);
8318         tcg_temp_free_i32(tcg_op);
8319         if (!is_scalar) {
8320             clear_vec_high(s, is_q, rd);
8321         }
8322     }
8323
8324     tcg_temp_free_ptr(fpst);
8325 }
8326
8327 static void handle_2misc_reciprocal(DisasContext *s, int opcode,
8328                                     bool is_scalar, bool is_u, bool is_q,
8329                                     int size, int rn, int rd)
8330 {
8331     bool is_double = (size == 3);
8332     TCGv_ptr fpst = get_fpstatus_ptr(false);
8333
8334     if (is_double) {
8335         TCGv_i64 tcg_op = tcg_temp_new_i64();
8336         TCGv_i64 tcg_res = tcg_temp_new_i64();
8337         int pass;
8338
8339         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8340             read_vec_element(s, tcg_op, rn, pass, MO_64);
8341             switch (opcode) {
8342             case 0x3d: /* FRECPE */
8343                 gen_helper_recpe_f64(tcg_res, tcg_op, fpst);
8344                 break;
8345             case 0x3f: /* FRECPX */
8346                 gen_helper_frecpx_f64(tcg_res, tcg_op, fpst);
8347                 break;
8348             case 0x7d: /* FRSQRTE */
8349                 gen_helper_rsqrte_f64(tcg_res, tcg_op, fpst);
8350                 break;
8351             default:
8352                 g_assert_not_reached();
8353             }
8354             write_vec_element(s, tcg_res, rd, pass, MO_64);
8355         }
8356         tcg_temp_free_i64(tcg_res);
8357         tcg_temp_free_i64(tcg_op);
8358         clear_vec_high(s, !is_scalar, rd);
8359     } else {
8360         TCGv_i32 tcg_op = tcg_temp_new_i32();
8361         TCGv_i32 tcg_res = tcg_temp_new_i32();
8362         int pass, maxpasses;
8363
8364         if (is_scalar) {
8365             maxpasses = 1;
8366         } else {
8367             maxpasses = is_q ? 4 : 2;
8368         }
8369
8370         for (pass = 0; pass < maxpasses; pass++) {
8371             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
8372
8373             switch (opcode) {
8374             case 0x3c: /* URECPE */
8375                 gen_helper_recpe_u32(tcg_res, tcg_op, fpst);
8376                 break;
8377             case 0x3d: /* FRECPE */
8378                 gen_helper_recpe_f32(tcg_res, tcg_op, fpst);
8379                 break;
8380             case 0x3f: /* FRECPX */
8381                 gen_helper_frecpx_f32(tcg_res, tcg_op, fpst);
8382                 break;
8383             case 0x7d: /* FRSQRTE */
8384                 gen_helper_rsqrte_f32(tcg_res, tcg_op, fpst);
8385                 break;
8386             default:
8387                 g_assert_not_reached();
8388             }
8389
8390             if (is_scalar) {
8391                 write_fp_sreg(s, rd, tcg_res);
8392             } else {
8393                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
8394             }
8395         }
8396         tcg_temp_free_i32(tcg_res);
8397         tcg_temp_free_i32(tcg_op);
8398         if (!is_scalar) {
8399             clear_vec_high(s, is_q, rd);
8400         }
8401     }
8402     tcg_temp_free_ptr(fpst);
8403 }
8404
8405 static void handle_2misc_narrow(DisasContext *s, bool scalar,
8406                                 int opcode, bool u, bool is_q,
8407                                 int size, int rn, int rd)
8408 {
8409     /* Handle 2-reg-misc ops which are narrowing (so each 2*size element
8410      * in the source becomes a size element in the destination).
8411      */
8412     int pass;
8413     TCGv_i32 tcg_res[2];
8414     int destelt = is_q ? 2 : 0;
8415     int passes = scalar ? 1 : 2;
8416
8417     if (scalar) {
8418         tcg_res[1] = tcg_const_i32(0);
8419     }
8420
8421     for (pass = 0; pass < passes; pass++) {
8422         TCGv_i64 tcg_op = tcg_temp_new_i64();
8423         NeonGenNarrowFn *genfn = NULL;
8424         NeonGenNarrowEnvFn *genenvfn = NULL;
8425
8426         if (scalar) {
8427             read_vec_element(s, tcg_op, rn, pass, size + 1);
8428         } else {
8429             read_vec_element(s, tcg_op, rn, pass, MO_64);
8430         }
8431         tcg_res[pass] = tcg_temp_new_i32();
8432
8433         switch (opcode) {
8434         case 0x12: /* XTN, SQXTUN */
8435         {
8436             static NeonGenNarrowFn * const xtnfns[3] = {
8437                 gen_helper_neon_narrow_u8,
8438                 gen_helper_neon_narrow_u16,
8439                 tcg_gen_extrl_i64_i32,
8440             };
8441             static NeonGenNarrowEnvFn * const sqxtunfns[3] = {
8442                 gen_helper_neon_unarrow_sat8,
8443                 gen_helper_neon_unarrow_sat16,
8444                 gen_helper_neon_unarrow_sat32,
8445             };
8446             if (u) {
8447                 genenvfn = sqxtunfns[size];
8448             } else {
8449                 genfn = xtnfns[size];
8450             }
8451             break;
8452         }
8453         case 0x14: /* SQXTN, UQXTN */
8454         {
8455             static NeonGenNarrowEnvFn * const fns[3][2] = {
8456                 { gen_helper_neon_narrow_sat_s8,
8457                   gen_helper_neon_narrow_sat_u8 },
8458                 { gen_helper_neon_narrow_sat_s16,
8459                   gen_helper_neon_narrow_sat_u16 },
8460                 { gen_helper_neon_narrow_sat_s32,
8461                   gen_helper_neon_narrow_sat_u32 },
8462             };
8463             genenvfn = fns[size][u];
8464             break;
8465         }
8466         case 0x16: /* FCVTN, FCVTN2 */
8467             /* 32 bit to 16 bit or 64 bit to 32 bit float conversion */
8468             if (size == 2) {
8469                 gen_helper_vfp_fcvtsd(tcg_res[pass], tcg_op, cpu_env);
8470             } else {
8471                 TCGv_i32 tcg_lo = tcg_temp_new_i32();
8472                 TCGv_i32 tcg_hi = tcg_temp_new_i32();
8473                 tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, tcg_op);
8474                 gen_helper_vfp_fcvt_f32_to_f16(tcg_lo, tcg_lo, cpu_env);
8475                 gen_helper_vfp_fcvt_f32_to_f16(tcg_hi, tcg_hi, cpu_env);
8476                 tcg_gen_deposit_i32(tcg_res[pass], tcg_lo, tcg_hi, 16, 16);
8477                 tcg_temp_free_i32(tcg_lo);
8478                 tcg_temp_free_i32(tcg_hi);
8479             }
8480             break;
8481         case 0x56:  /* FCVTXN, FCVTXN2 */
8482             /* 64 bit to 32 bit float conversion
8483              * with von Neumann rounding (round to odd)
8484              */
8485             assert(size == 2);
8486             gen_helper_fcvtx_f64_to_f32(tcg_res[pass], tcg_op, cpu_env);
8487             break;
8488         default:
8489             g_assert_not_reached();
8490         }
8491
8492         if (genfn) {
8493             genfn(tcg_res[pass], tcg_op);
8494         } else if (genenvfn) {
8495             genenvfn(tcg_res[pass], cpu_env, tcg_op);
8496         }
8497
8498         tcg_temp_free_i64(tcg_op);
8499     }
8500
8501     for (pass = 0; pass < 2; pass++) {
8502         write_vec_element_i32(s, tcg_res[pass], rd, destelt + pass, MO_32);
8503         tcg_temp_free_i32(tcg_res[pass]);
8504     }
8505     clear_vec_high(s, is_q, rd);
8506 }
8507
8508 /* Remaining saturating accumulating ops */
8509 static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
8510                                 bool is_q, int size, int rn, int rd)
8511 {
8512     bool is_double = (size == 3);
8513
8514     if (is_double) {
8515         TCGv_i64 tcg_rn = tcg_temp_new_i64();
8516         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8517         int pass;
8518
8519         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
8520             read_vec_element(s, tcg_rn, rn, pass, MO_64);
8521             read_vec_element(s, tcg_rd, rd, pass, MO_64);
8522
8523             if (is_u) { /* USQADD */
8524                 gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8525             } else { /* SUQADD */
8526                 gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8527             }
8528             write_vec_element(s, tcg_rd, rd, pass, MO_64);
8529         }
8530         tcg_temp_free_i64(tcg_rd);
8531         tcg_temp_free_i64(tcg_rn);
8532         clear_vec_high(s, !is_scalar, rd);
8533     } else {
8534         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8535         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8536         int pass, maxpasses;
8537
8538         if (is_scalar) {
8539             maxpasses = 1;
8540         } else {
8541             maxpasses = is_q ? 4 : 2;
8542         }
8543
8544         for (pass = 0; pass < maxpasses; pass++) {
8545             if (is_scalar) {
8546                 read_vec_element_i32(s, tcg_rn, rn, pass, size);
8547                 read_vec_element_i32(s, tcg_rd, rd, pass, size);
8548             } else {
8549                 read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
8550                 read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8551             }
8552
8553             if (is_u) { /* USQADD */
8554                 switch (size) {
8555                 case 0:
8556                     gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8557                     break;
8558                 case 1:
8559                     gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8560                     break;
8561                 case 2:
8562                     gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8563                     break;
8564                 default:
8565                     g_assert_not_reached();
8566                 }
8567             } else { /* SUQADD */
8568                 switch (size) {
8569                 case 0:
8570                     gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8571                     break;
8572                 case 1:
8573                     gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8574                     break;
8575                 case 2:
8576                     gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
8577                     break;
8578                 default:
8579                     g_assert_not_reached();
8580                 }
8581             }
8582
8583             if (is_scalar) {
8584                 TCGv_i64 tcg_zero = tcg_const_i64(0);
8585                 write_vec_element(s, tcg_zero, rd, 0, MO_64);
8586                 tcg_temp_free_i64(tcg_zero);
8587             }
8588             write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
8589         }
8590         tcg_temp_free_i32(tcg_rd);
8591         tcg_temp_free_i32(tcg_rn);
8592         clear_vec_high(s, is_q, rd);
8593     }
8594 }
8595
8596 /* AdvSIMD scalar two reg misc
8597  *  31 30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
8598  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8599  * | 0 1 | U | 1 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
8600  * +-----+---+-----------+------+-----------+--------+-----+------+------+
8601  */
8602 static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
8603 {
8604     int rd = extract32(insn, 0, 5);
8605     int rn = extract32(insn, 5, 5);
8606     int opcode = extract32(insn, 12, 5);
8607     int size = extract32(insn, 22, 2);
8608     bool u = extract32(insn, 29, 1);
8609     bool is_fcvt = false;
8610     int rmode;
8611     TCGv_i32 tcg_rmode;
8612     TCGv_ptr tcg_fpstatus;
8613
8614     switch (opcode) {
8615     case 0x3: /* USQADD / SUQADD*/
8616         if (!fp_access_check(s)) {
8617             return;
8618         }
8619         handle_2misc_satacc(s, true, u, false, size, rn, rd);
8620         return;
8621     case 0x7: /* SQABS / SQNEG */
8622         break;
8623     case 0xa: /* CMLT */
8624         if (u) {
8625             unallocated_encoding(s);
8626             return;
8627         }
8628         /* fall through */
8629     case 0x8: /* CMGT, CMGE */
8630     case 0x9: /* CMEQ, CMLE */
8631     case 0xb: /* ABS, NEG */
8632         if (size != 3) {
8633             unallocated_encoding(s);
8634             return;
8635         }
8636         break;
8637     case 0x12: /* SQXTUN */
8638         if (!u) {
8639             unallocated_encoding(s);
8640             return;
8641         }
8642         /* fall through */
8643     case 0x14: /* SQXTN, UQXTN */
8644         if (size == 3) {
8645             unallocated_encoding(s);
8646             return;
8647         }
8648         if (!fp_access_check(s)) {
8649             return;
8650         }
8651         handle_2misc_narrow(s, true, opcode, u, false, size, rn, rd);
8652         return;
8653     case 0xc ... 0xf:
8654     case 0x16 ... 0x1d:
8655     case 0x1f:
8656         /* Floating point: U, size[1] and opcode indicate operation;
8657          * size[0] indicates single or double precision.
8658          */
8659         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
8660         size = extract32(size, 0, 1) ? 3 : 2;
8661         switch (opcode) {
8662         case 0x2c: /* FCMGT (zero) */
8663         case 0x2d: /* FCMEQ (zero) */
8664         case 0x2e: /* FCMLT (zero) */
8665         case 0x6c: /* FCMGE (zero) */
8666         case 0x6d: /* FCMLE (zero) */
8667             handle_2misc_fcmp_zero(s, opcode, true, u, true, size, rn, rd);
8668             return;
8669         case 0x1d: /* SCVTF */
8670         case 0x5d: /* UCVTF */
8671         {
8672             bool is_signed = (opcode == 0x1d);
8673             if (!fp_access_check(s)) {
8674                 return;
8675             }
8676             handle_simd_intfp_conv(s, rd, rn, 1, is_signed, 0, size);
8677             return;
8678         }
8679         case 0x3d: /* FRECPE */
8680         case 0x3f: /* FRECPX */
8681         case 0x7d: /* FRSQRTE */
8682             if (!fp_access_check(s)) {
8683                 return;
8684             }
8685             handle_2misc_reciprocal(s, opcode, true, u, true, size, rn, rd);
8686             return;
8687         case 0x1a: /* FCVTNS */
8688         case 0x1b: /* FCVTMS */
8689         case 0x3a: /* FCVTPS */
8690         case 0x3b: /* FCVTZS */
8691         case 0x5a: /* FCVTNU */
8692         case 0x5b: /* FCVTMU */
8693         case 0x7a: /* FCVTPU */
8694         case 0x7b: /* FCVTZU */
8695             is_fcvt = true;
8696             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
8697             break;
8698         case 0x1c: /* FCVTAS */
8699         case 0x5c: /* FCVTAU */
8700             /* TIEAWAY doesn't fit in the usual rounding mode encoding */
8701             is_fcvt = true;
8702             rmode = FPROUNDING_TIEAWAY;
8703             break;
8704         case 0x56: /* FCVTXN, FCVTXN2 */
8705             if (size == 2) {
8706                 unallocated_encoding(s);
8707                 return;
8708             }
8709             if (!fp_access_check(s)) {
8710                 return;
8711             }
8712             handle_2misc_narrow(s, true, opcode, u, false, size - 1, rn, rd);
8713             return;
8714         default:
8715             unallocated_encoding(s);
8716             return;
8717         }
8718         break;
8719     default:
8720         unallocated_encoding(s);
8721         return;
8722     }
8723
8724     if (!fp_access_check(s)) {
8725         return;
8726     }
8727
8728     if (is_fcvt) {
8729         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
8730         tcg_fpstatus = get_fpstatus_ptr(false);
8731         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
8732     } else {
8733         tcg_rmode = NULL;
8734         tcg_fpstatus = NULL;
8735     }
8736
8737     if (size == 3) {
8738         TCGv_i64 tcg_rn = read_fp_dreg(s, rn);
8739         TCGv_i64 tcg_rd = tcg_temp_new_i64();
8740
8741         handle_2misc_64(s, opcode, u, tcg_rd, tcg_rn, tcg_rmode, tcg_fpstatus);
8742         write_fp_dreg(s, rd, tcg_rd);
8743         tcg_temp_free_i64(tcg_rd);
8744         tcg_temp_free_i64(tcg_rn);
8745     } else {
8746         TCGv_i32 tcg_rn = tcg_temp_new_i32();
8747         TCGv_i32 tcg_rd = tcg_temp_new_i32();
8748
8749         read_vec_element_i32(s, tcg_rn, rn, 0, size);
8750
8751         switch (opcode) {
8752         case 0x7: /* SQABS, SQNEG */
8753         {
8754             NeonGenOneOpEnvFn *genfn;
8755             static NeonGenOneOpEnvFn * const fns[3][2] = {
8756                 { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
8757                 { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
8758                 { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
8759             };
8760             genfn = fns[size][u];
8761             genfn(tcg_rd, cpu_env, tcg_rn);
8762             break;
8763         }
8764         case 0x1a: /* FCVTNS */
8765         case 0x1b: /* FCVTMS */
8766         case 0x1c: /* FCVTAS */
8767         case 0x3a: /* FCVTPS */
8768         case 0x3b: /* FCVTZS */
8769         {
8770             TCGv_i32 tcg_shift = tcg_const_i32(0);
8771             gen_helper_vfp_tosls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8772             tcg_temp_free_i32(tcg_shift);
8773             break;
8774         }
8775         case 0x5a: /* FCVTNU */
8776         case 0x5b: /* FCVTMU */
8777         case 0x5c: /* FCVTAU */
8778         case 0x7a: /* FCVTPU */
8779         case 0x7b: /* FCVTZU */
8780         {
8781             TCGv_i32 tcg_shift = tcg_const_i32(0);
8782             gen_helper_vfp_touls(tcg_rd, tcg_rn, tcg_shift, tcg_fpstatus);
8783             tcg_temp_free_i32(tcg_shift);
8784             break;
8785         }
8786         default:
8787             g_assert_not_reached();
8788         }
8789
8790         write_fp_sreg(s, rd, tcg_rd);
8791         tcg_temp_free_i32(tcg_rd);
8792         tcg_temp_free_i32(tcg_rn);
8793     }
8794
8795     if (is_fcvt) {
8796         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
8797         tcg_temp_free_i32(tcg_rmode);
8798         tcg_temp_free_ptr(tcg_fpstatus);
8799     }
8800 }
8801
8802 static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8803 {
8804     tcg_gen_vec_sar8i_i64(a, a, shift);
8805     tcg_gen_vec_add8_i64(d, d, a);
8806 }
8807
8808 static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8809 {
8810     tcg_gen_vec_sar16i_i64(a, a, shift);
8811     tcg_gen_vec_add16_i64(d, d, a);
8812 }
8813
8814 static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
8815 {
8816     tcg_gen_sari_i32(a, a, shift);
8817     tcg_gen_add_i32(d, d, a);
8818 }
8819
8820 static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8821 {
8822     tcg_gen_sari_i64(a, a, shift);
8823     tcg_gen_add_i64(d, d, a);
8824 }
8825
8826 static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
8827 {
8828     tcg_gen_sari_vec(vece, a, a, sh);
8829     tcg_gen_add_vec(vece, d, d, a);
8830 }
8831
8832 static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8833 {
8834     tcg_gen_vec_shr8i_i64(a, a, shift);
8835     tcg_gen_vec_add8_i64(d, d, a);
8836 }
8837
8838 static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8839 {
8840     tcg_gen_vec_shr16i_i64(a, a, shift);
8841     tcg_gen_vec_add16_i64(d, d, a);
8842 }
8843
8844 static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
8845 {
8846     tcg_gen_shri_i32(a, a, shift);
8847     tcg_gen_add_i32(d, d, a);
8848 }
8849
8850 static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8851 {
8852     tcg_gen_shri_i64(a, a, shift);
8853     tcg_gen_add_i64(d, d, a);
8854 }
8855
8856 static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
8857 {
8858     tcg_gen_shri_vec(vece, a, a, sh);
8859     tcg_gen_add_vec(vece, d, d, a);
8860 }
8861
8862 static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8863 {
8864     uint64_t mask = dup_const(MO_8, 0xff >> shift);
8865     TCGv_i64 t = tcg_temp_new_i64();
8866
8867     tcg_gen_shri_i64(t, a, shift);
8868     tcg_gen_andi_i64(t, t, mask);
8869     tcg_gen_andi_i64(d, d, ~mask);
8870     tcg_gen_or_i64(d, d, t);
8871     tcg_temp_free_i64(t);
8872 }
8873
8874 static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8875 {
8876     uint64_t mask = dup_const(MO_16, 0xffff >> shift);
8877     TCGv_i64 t = tcg_temp_new_i64();
8878
8879     tcg_gen_shri_i64(t, a, shift);
8880     tcg_gen_andi_i64(t, t, mask);
8881     tcg_gen_andi_i64(d, d, ~mask);
8882     tcg_gen_or_i64(d, d, t);
8883     tcg_temp_free_i64(t);
8884 }
8885
8886 static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
8887 {
8888     tcg_gen_shri_i32(a, a, shift);
8889     tcg_gen_deposit_i32(d, d, a, 0, 32 - shift);
8890 }
8891
8892 static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
8893 {
8894     tcg_gen_shri_i64(a, a, shift);
8895     tcg_gen_deposit_i64(d, d, a, 0, 64 - shift);
8896 }
8897
8898 static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
8899 {
8900     uint64_t mask = (2ull << ((8 << vece) - 1)) - 1;
8901     TCGv_vec t = tcg_temp_new_vec_matching(d);
8902     TCGv_vec m = tcg_temp_new_vec_matching(d);
8903
8904     tcg_gen_dupi_vec(vece, m, mask ^ (mask >> sh));
8905     tcg_gen_shri_vec(vece, t, a, sh);
8906     tcg_gen_and_vec(vece, d, d, m);
8907     tcg_gen_or_vec(vece, d, d, t);
8908
8909     tcg_temp_free_vec(t);
8910     tcg_temp_free_vec(m);
8911 }
8912
8913 /* SSHR[RA]/USHR[RA] - Vector shift right (optional rounding/accumulate) */
8914 static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
8915                                  int immh, int immb, int opcode, int rn, int rd)
8916 {
8917     static const GVecGen2i ssra_op[4] = {
8918         { .fni8 = gen_ssra8_i64,
8919           .fniv = gen_ssra_vec,
8920           .load_dest = true,
8921           .opc = INDEX_op_sari_vec,
8922           .vece = MO_8 },
8923         { .fni8 = gen_ssra16_i64,
8924           .fniv = gen_ssra_vec,
8925           .load_dest = true,
8926           .opc = INDEX_op_sari_vec,
8927           .vece = MO_16 },
8928         { .fni4 = gen_ssra32_i32,
8929           .fniv = gen_ssra_vec,
8930           .load_dest = true,
8931           .opc = INDEX_op_sari_vec,
8932           .vece = MO_32 },
8933         { .fni8 = gen_ssra64_i64,
8934           .fniv = gen_ssra_vec,
8935           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
8936           .load_dest = true,
8937           .opc = INDEX_op_sari_vec,
8938           .vece = MO_64 },
8939     };
8940     static const GVecGen2i usra_op[4] = {
8941         { .fni8 = gen_usra8_i64,
8942           .fniv = gen_usra_vec,
8943           .load_dest = true,
8944           .opc = INDEX_op_shri_vec,
8945           .vece = MO_8, },
8946         { .fni8 = gen_usra16_i64,
8947           .fniv = gen_usra_vec,
8948           .load_dest = true,
8949           .opc = INDEX_op_shri_vec,
8950           .vece = MO_16, },
8951         { .fni4 = gen_usra32_i32,
8952           .fniv = gen_usra_vec,
8953           .load_dest = true,
8954           .opc = INDEX_op_shri_vec,
8955           .vece = MO_32, },
8956         { .fni8 = gen_usra64_i64,
8957           .fniv = gen_usra_vec,
8958           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
8959           .load_dest = true,
8960           .opc = INDEX_op_shri_vec,
8961           .vece = MO_64, },
8962     };
8963     static const GVecGen2i sri_op[4] = {
8964         { .fni8 = gen_shr8_ins_i64,
8965           .fniv = gen_shr_ins_vec,
8966           .load_dest = true,
8967           .opc = INDEX_op_shri_vec,
8968           .vece = MO_8 },
8969         { .fni8 = gen_shr16_ins_i64,
8970           .fniv = gen_shr_ins_vec,
8971           .load_dest = true,
8972           .opc = INDEX_op_shri_vec,
8973           .vece = MO_16 },
8974         { .fni4 = gen_shr32_ins_i32,
8975           .fniv = gen_shr_ins_vec,
8976           .load_dest = true,
8977           .opc = INDEX_op_shri_vec,
8978           .vece = MO_32 },
8979         { .fni8 = gen_shr64_ins_i64,
8980           .fniv = gen_shr_ins_vec,
8981           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
8982           .load_dest = true,
8983           .opc = INDEX_op_shri_vec,
8984           .vece = MO_64 },
8985     };
8986
8987     int size = 32 - clz32(immh) - 1;
8988     int immhb = immh << 3 | immb;
8989     int shift = 2 * (8 << size) - immhb;
8990     bool accumulate = false;
8991     int dsize = is_q ? 128 : 64;
8992     int esize = 8 << size;
8993     int elements = dsize/esize;
8994     TCGMemOp memop = size | (is_u ? 0 : MO_SIGN);
8995     TCGv_i64 tcg_rn = new_tmp_a64(s);
8996     TCGv_i64 tcg_rd = new_tmp_a64(s);
8997     TCGv_i64 tcg_round;
8998     uint64_t round_const;
8999     int i;
9000
9001     if (extract32(immh, 3, 1) && !is_q) {
9002         unallocated_encoding(s);
9003         return;
9004     }
9005
9006     if (size > 3 && !is_q) {
9007         unallocated_encoding(s);
9008         return;
9009     }
9010
9011     if (!fp_access_check(s)) {
9012         return;
9013     }
9014
9015     switch (opcode) {
9016     case 0x02: /* SSRA / USRA (accumulate) */
9017         if (is_u) {
9018             /* Shift count same as element size produces zero to add.  */
9019             if (shift == 8 << size) {
9020                 goto done;
9021             }
9022             gen_gvec_op2i(s, is_q, rd, rn, shift, &usra_op[size]);
9023         } else {
9024             /* Shift count same as element size produces all sign to add.  */
9025             if (shift == 8 << size) {
9026                 shift -= 1;
9027             }
9028             gen_gvec_op2i(s, is_q, rd, rn, shift, &ssra_op[size]);
9029         }
9030         return;
9031     case 0x08: /* SRI */
9032         /* Shift count same as element size is valid but does nothing.  */
9033         if (shift == 8 << size) {
9034             goto done;
9035         }
9036         gen_gvec_op2i(s, is_q, rd, rn, shift, &sri_op[size]);
9037         return;
9038
9039     case 0x00: /* SSHR / USHR */
9040         if (is_u) {
9041             if (shift == 8 << size) {
9042                 /* Shift count the same size as element size produces zero.  */
9043                 tcg_gen_gvec_dup8i(vec_full_reg_offset(s, rd),
9044                                    is_q ? 16 : 8, vec_full_reg_size(s), 0);
9045             } else {
9046                 gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shri, size);
9047             }
9048         } else {
9049             /* Shift count the same size as element size produces all sign.  */
9050             if (shift == 8 << size) {
9051                 shift -= 1;
9052             }
9053             gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_sari, size);
9054         }
9055         return;
9056
9057     case 0x04: /* SRSHR / URSHR (rounding) */
9058         break;
9059     case 0x06: /* SRSRA / URSRA (accum + rounding) */
9060         accumulate = true;
9061         break;
9062     default:
9063         g_assert_not_reached();
9064     }
9065
9066     round_const = 1ULL << (shift - 1);
9067     tcg_round = tcg_const_i64(round_const);
9068
9069     for (i = 0; i < elements; i++) {
9070         read_vec_element(s, tcg_rn, rn, i, memop);
9071         if (accumulate) {
9072             read_vec_element(s, tcg_rd, rd, i, memop);
9073         }
9074
9075         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
9076                                 accumulate, is_u, size, shift);
9077
9078         write_vec_element(s, tcg_rd, rd, i, size);
9079     }
9080     tcg_temp_free_i64(tcg_round);
9081
9082  done:
9083     clear_vec_high(s, is_q, rd);
9084 }
9085
9086 static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9087 {
9088     uint64_t mask = dup_const(MO_8, 0xff << shift);
9089     TCGv_i64 t = tcg_temp_new_i64();
9090
9091     tcg_gen_shli_i64(t, a, shift);
9092     tcg_gen_andi_i64(t, t, mask);
9093     tcg_gen_andi_i64(d, d, ~mask);
9094     tcg_gen_or_i64(d, d, t);
9095     tcg_temp_free_i64(t);
9096 }
9097
9098 static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9099 {
9100     uint64_t mask = dup_const(MO_16, 0xffff << shift);
9101     TCGv_i64 t = tcg_temp_new_i64();
9102
9103     tcg_gen_shli_i64(t, a, shift);
9104     tcg_gen_andi_i64(t, t, mask);
9105     tcg_gen_andi_i64(d, d, ~mask);
9106     tcg_gen_or_i64(d, d, t);
9107     tcg_temp_free_i64(t);
9108 }
9109
9110 static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
9111 {
9112     tcg_gen_deposit_i32(d, d, a, shift, 32 - shift);
9113 }
9114
9115 static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
9116 {
9117     tcg_gen_deposit_i64(d, d, a, shift, 64 - shift);
9118 }
9119
9120 static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
9121 {
9122     uint64_t mask = (1ull << sh) - 1;
9123     TCGv_vec t = tcg_temp_new_vec_matching(d);
9124     TCGv_vec m = tcg_temp_new_vec_matching(d);
9125
9126     tcg_gen_dupi_vec(vece, m, mask);
9127     tcg_gen_shli_vec(vece, t, a, sh);
9128     tcg_gen_and_vec(vece, d, d, m);
9129     tcg_gen_or_vec(vece, d, d, t);
9130
9131     tcg_temp_free_vec(t);
9132     tcg_temp_free_vec(m);
9133 }
9134
9135 /* SHL/SLI - Vector shift left */
9136 static void handle_vec_simd_shli(DisasContext *s, bool is_q, bool insert,
9137                                  int immh, int immb, int opcode, int rn, int rd)
9138 {
9139     static const GVecGen2i shi_op[4] = {
9140         { .fni8 = gen_shl8_ins_i64,
9141           .fniv = gen_shl_ins_vec,
9142           .opc = INDEX_op_shli_vec,
9143           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9144           .load_dest = true,
9145           .vece = MO_8 },
9146         { .fni8 = gen_shl16_ins_i64,
9147           .fniv = gen_shl_ins_vec,
9148           .opc = INDEX_op_shli_vec,
9149           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9150           .load_dest = true,
9151           .vece = MO_16 },
9152         { .fni4 = gen_shl32_ins_i32,
9153           .fniv = gen_shl_ins_vec,
9154           .opc = INDEX_op_shli_vec,
9155           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9156           .load_dest = true,
9157           .vece = MO_32 },
9158         { .fni8 = gen_shl64_ins_i64,
9159           .fniv = gen_shl_ins_vec,
9160           .opc = INDEX_op_shli_vec,
9161           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9162           .load_dest = true,
9163           .vece = MO_64 },
9164     };
9165     int size = 32 - clz32(immh) - 1;
9166     int immhb = immh << 3 | immb;
9167     int shift = immhb - (8 << size);
9168
9169     if (extract32(immh, 3, 1) && !is_q) {
9170         unallocated_encoding(s);
9171         return;
9172     }
9173
9174     if (size > 3 && !is_q) {
9175         unallocated_encoding(s);
9176         return;
9177     }
9178
9179     if (!fp_access_check(s)) {
9180         return;
9181     }
9182
9183     if (insert) {
9184         gen_gvec_op2i(s, is_q, rd, rn, shift, &shi_op[size]);
9185     } else {
9186         gen_gvec_fn2i(s, is_q, rd, rn, shift, tcg_gen_gvec_shli, size);
9187     }
9188 }
9189
9190 /* USHLL/SHLL - Vector shift left with widening */
9191 static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
9192                                  int immh, int immb, int opcode, int rn, int rd)
9193 {
9194     int size = 32 - clz32(immh) - 1;
9195     int immhb = immh << 3 | immb;
9196     int shift = immhb - (8 << size);
9197     int dsize = 64;
9198     int esize = 8 << size;
9199     int elements = dsize/esize;
9200     TCGv_i64 tcg_rn = new_tmp_a64(s);
9201     TCGv_i64 tcg_rd = new_tmp_a64(s);
9202     int i;
9203
9204     if (size >= 3) {
9205         unallocated_encoding(s);
9206         return;
9207     }
9208
9209     if (!fp_access_check(s)) {
9210         return;
9211     }
9212
9213     /* For the LL variants the store is larger than the load,
9214      * so if rd == rn we would overwrite parts of our input.
9215      * So load everything right now and use shifts in the main loop.
9216      */
9217     read_vec_element(s, tcg_rn, rn, is_q ? 1 : 0, MO_64);
9218
9219     for (i = 0; i < elements; i++) {
9220         tcg_gen_shri_i64(tcg_rd, tcg_rn, i * esize);
9221         ext_and_shift_reg(tcg_rd, tcg_rd, size | (!is_u << 2), 0);
9222         tcg_gen_shli_i64(tcg_rd, tcg_rd, shift);
9223         write_vec_element(s, tcg_rd, rd, i, size + 1);
9224     }
9225 }
9226
9227 /* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
9228 static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
9229                                  int immh, int immb, int opcode, int rn, int rd)
9230 {
9231     int immhb = immh << 3 | immb;
9232     int size = 32 - clz32(immh) - 1;
9233     int dsize = 64;
9234     int esize = 8 << size;
9235     int elements = dsize/esize;
9236     int shift = (2 * esize) - immhb;
9237     bool round = extract32(opcode, 0, 1);
9238     TCGv_i64 tcg_rn, tcg_rd, tcg_final;
9239     TCGv_i64 tcg_round;
9240     int i;
9241
9242     if (extract32(immh, 3, 1)) {
9243         unallocated_encoding(s);
9244         return;
9245     }
9246
9247     if (!fp_access_check(s)) {
9248         return;
9249     }
9250
9251     tcg_rn = tcg_temp_new_i64();
9252     tcg_rd = tcg_temp_new_i64();
9253     tcg_final = tcg_temp_new_i64();
9254     read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
9255
9256     if (round) {
9257         uint64_t round_const = 1ULL << (shift - 1);
9258         tcg_round = tcg_const_i64(round_const);
9259     } else {
9260         tcg_round = NULL;
9261     }
9262
9263     for (i = 0; i < elements; i++) {
9264         read_vec_element(s, tcg_rn, rn, i, size+1);
9265         handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
9266                                 false, true, size+1, shift);
9267
9268         tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
9269     }
9270
9271     if (!is_q) {
9272         write_vec_element(s, tcg_final, rd, 0, MO_64);
9273     } else {
9274         write_vec_element(s, tcg_final, rd, 1, MO_64);
9275     }
9276     if (round) {
9277         tcg_temp_free_i64(tcg_round);
9278     }
9279     tcg_temp_free_i64(tcg_rn);
9280     tcg_temp_free_i64(tcg_rd);
9281     tcg_temp_free_i64(tcg_final);
9282
9283     clear_vec_high(s, is_q, rd);
9284 }
9285
9286
9287 /* AdvSIMD shift by immediate
9288  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
9289  * +---+---+---+-------------+------+------+--------+---+------+------+
9290  * | 0 | Q | U | 0 1 1 1 1 0 | immh | immb | opcode | 1 |  Rn  |  Rd  |
9291  * +---+---+---+-------------+------+------+--------+---+------+------+
9292  */
9293 static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
9294 {
9295     int rd = extract32(insn, 0, 5);
9296     int rn = extract32(insn, 5, 5);
9297     int opcode = extract32(insn, 11, 5);
9298     int immb = extract32(insn, 16, 3);
9299     int immh = extract32(insn, 19, 4);
9300     bool is_u = extract32(insn, 29, 1);
9301     bool is_q = extract32(insn, 30, 1);
9302
9303     switch (opcode) {
9304     case 0x08: /* SRI */
9305         if (!is_u) {
9306             unallocated_encoding(s);
9307             return;
9308         }
9309         /* fall through */
9310     case 0x00: /* SSHR / USHR */
9311     case 0x02: /* SSRA / USRA (accumulate) */
9312     case 0x04: /* SRSHR / URSHR (rounding) */
9313     case 0x06: /* SRSRA / URSRA (accum + rounding) */
9314         handle_vec_simd_shri(s, is_q, is_u, immh, immb, opcode, rn, rd);
9315         break;
9316     case 0x0a: /* SHL / SLI */
9317         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
9318         break;
9319     case 0x10: /* SHRN */
9320     case 0x11: /* RSHRN / SQRSHRUN */
9321         if (is_u) {
9322             handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
9323                                    opcode, rn, rd);
9324         } else {
9325             handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
9326         }
9327         break;
9328     case 0x12: /* SQSHRN / UQSHRN */
9329     case 0x13: /* SQRSHRN / UQRSHRN */
9330         handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
9331                                opcode, rn, rd);
9332         break;
9333     case 0x14: /* SSHLL / USHLL */
9334         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
9335         break;
9336     case 0x1c: /* SCVTF / UCVTF */
9337         handle_simd_shift_intfp_conv(s, false, is_q, is_u, immh, immb,
9338                                      opcode, rn, rd);
9339         break;
9340     case 0xc: /* SQSHLU */
9341         if (!is_u) {
9342             unallocated_encoding(s);
9343             return;
9344         }
9345         handle_simd_qshl(s, false, is_q, false, true, immh, immb, rn, rd);
9346         break;
9347     case 0xe: /* SQSHL, UQSHL */
9348         handle_simd_qshl(s, false, is_q, is_u, is_u, immh, immb, rn, rd);
9349         break;
9350     case 0x1f: /* FCVTZS/ FCVTZU */
9351         handle_simd_shift_fpint_conv(s, false, is_q, is_u, immh, immb, rn, rd);
9352         return;
9353     default:
9354         unallocated_encoding(s);
9355         return;
9356     }
9357 }
9358
9359 /* Generate code to do a "long" addition or subtraction, ie one done in
9360  * TCGv_i64 on vector lanes twice the width specified by size.
9361  */
9362 static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
9363                           TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
9364 {
9365     static NeonGenTwo64OpFn * const fns[3][2] = {
9366         { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
9367         { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
9368         { tcg_gen_add_i64, tcg_gen_sub_i64 },
9369     };
9370     NeonGenTwo64OpFn *genfn;
9371     assert(size < 3);
9372
9373     genfn = fns[size][is_sub];
9374     genfn(tcg_res, tcg_op1, tcg_op2);
9375 }
9376
9377 static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
9378                                 int opcode, int rd, int rn, int rm)
9379 {
9380     /* 3-reg-different widening insns: 64 x 64 -> 128 */
9381     TCGv_i64 tcg_res[2];
9382     int pass, accop;
9383
9384     tcg_res[0] = tcg_temp_new_i64();
9385     tcg_res[1] = tcg_temp_new_i64();
9386
9387     /* Does this op do an adding accumulate, a subtracting accumulate,
9388      * or no accumulate at all?
9389      */
9390     switch (opcode) {
9391     case 5:
9392     case 8:
9393     case 9:
9394         accop = 1;
9395         break;
9396     case 10:
9397     case 11:
9398         accop = -1;
9399         break;
9400     default:
9401         accop = 0;
9402         break;
9403     }
9404
9405     if (accop != 0) {
9406         read_vec_element(s, tcg_res[0], rd, 0, MO_64);
9407         read_vec_element(s, tcg_res[1], rd, 1, MO_64);
9408     }
9409
9410     /* size == 2 means two 32x32->64 operations; this is worth special
9411      * casing because we can generally handle it inline.
9412      */
9413     if (size == 2) {
9414         for (pass = 0; pass < 2; pass++) {
9415             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9416             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9417             TCGv_i64 tcg_passres;
9418             TCGMemOp memop = MO_32 | (is_u ? 0 : MO_SIGN);
9419
9420             int elt = pass + is_q * 2;
9421
9422             read_vec_element(s, tcg_op1, rn, elt, memop);
9423             read_vec_element(s, tcg_op2, rm, elt, memop);
9424
9425             if (accop == 0) {
9426                 tcg_passres = tcg_res[pass];
9427             } else {
9428                 tcg_passres = tcg_temp_new_i64();
9429             }
9430
9431             switch (opcode) {
9432             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9433                 tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
9434                 break;
9435             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9436                 tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
9437                 break;
9438             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9439             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9440             {
9441                 TCGv_i64 tcg_tmp1 = tcg_temp_new_i64();
9442                 TCGv_i64 tcg_tmp2 = tcg_temp_new_i64();
9443
9444                 tcg_gen_sub_i64(tcg_tmp1, tcg_op1, tcg_op2);
9445                 tcg_gen_sub_i64(tcg_tmp2, tcg_op2, tcg_op1);
9446                 tcg_gen_movcond_i64(is_u ? TCG_COND_GEU : TCG_COND_GE,
9447                                     tcg_passres,
9448                                     tcg_op1, tcg_op2, tcg_tmp1, tcg_tmp2);
9449                 tcg_temp_free_i64(tcg_tmp1);
9450                 tcg_temp_free_i64(tcg_tmp2);
9451                 break;
9452             }
9453             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9454             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9455             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
9456                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
9457                 break;
9458             case 9: /* SQDMLAL, SQDMLAL2 */
9459             case 11: /* SQDMLSL, SQDMLSL2 */
9460             case 13: /* SQDMULL, SQDMULL2 */
9461                 tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
9462                 gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
9463                                                   tcg_passres, tcg_passres);
9464                 break;
9465             default:
9466                 g_assert_not_reached();
9467             }
9468
9469             if (opcode == 9 || opcode == 11) {
9470                 /* saturating accumulate ops */
9471                 if (accop < 0) {
9472                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
9473                 }
9474                 gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
9475                                                   tcg_res[pass], tcg_passres);
9476             } else if (accop > 0) {
9477                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
9478             } else if (accop < 0) {
9479                 tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
9480             }
9481
9482             if (accop != 0) {
9483                 tcg_temp_free_i64(tcg_passres);
9484             }
9485
9486             tcg_temp_free_i64(tcg_op1);
9487             tcg_temp_free_i64(tcg_op2);
9488         }
9489     } else {
9490         /* size 0 or 1, generally helper functions */
9491         for (pass = 0; pass < 2; pass++) {
9492             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
9493             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9494             TCGv_i64 tcg_passres;
9495             int elt = pass + is_q * 2;
9496
9497             read_vec_element_i32(s, tcg_op1, rn, elt, MO_32);
9498             read_vec_element_i32(s, tcg_op2, rm, elt, MO_32);
9499
9500             if (accop == 0) {
9501                 tcg_passres = tcg_res[pass];
9502             } else {
9503                 tcg_passres = tcg_temp_new_i64();
9504             }
9505
9506             switch (opcode) {
9507             case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9508             case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9509             {
9510                 TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
9511                 static NeonGenWidenFn * const widenfns[2][2] = {
9512                     { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
9513                     { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
9514                 };
9515                 NeonGenWidenFn *widenfn = widenfns[size][is_u];
9516
9517                 widenfn(tcg_op2_64, tcg_op2);
9518                 widenfn(tcg_passres, tcg_op1);
9519                 gen_neon_addl(size, (opcode == 2), tcg_passres,
9520                               tcg_passres, tcg_op2_64);
9521                 tcg_temp_free_i64(tcg_op2_64);
9522                 break;
9523             }
9524             case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9525             case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9526                 if (size == 0) {
9527                     if (is_u) {
9528                         gen_helper_neon_abdl_u16(tcg_passres, tcg_op1, tcg_op2);
9529                     } else {
9530                         gen_helper_neon_abdl_s16(tcg_passres, tcg_op1, tcg_op2);
9531                     }
9532                 } else {
9533                     if (is_u) {
9534                         gen_helper_neon_abdl_u32(tcg_passres, tcg_op1, tcg_op2);
9535                     } else {
9536                         gen_helper_neon_abdl_s32(tcg_passres, tcg_op1, tcg_op2);
9537                     }
9538                 }
9539                 break;
9540             case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9541             case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9542             case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
9543                 if (size == 0) {
9544                     if (is_u) {
9545                         gen_helper_neon_mull_u8(tcg_passres, tcg_op1, tcg_op2);
9546                     } else {
9547                         gen_helper_neon_mull_s8(tcg_passres, tcg_op1, tcg_op2);
9548                     }
9549                 } else {
9550                     if (is_u) {
9551                         gen_helper_neon_mull_u16(tcg_passres, tcg_op1, tcg_op2);
9552                     } else {
9553                         gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
9554                     }
9555                 }
9556                 break;
9557             case 9: /* SQDMLAL, SQDMLAL2 */
9558             case 11: /* SQDMLSL, SQDMLSL2 */
9559             case 13: /* SQDMULL, SQDMULL2 */
9560                 assert(size == 1);
9561                 gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
9562                 gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
9563                                                   tcg_passres, tcg_passres);
9564                 break;
9565             case 14: /* PMULL */
9566                 assert(size == 0);
9567                 gen_helper_neon_mull_p8(tcg_passres, tcg_op1, tcg_op2);
9568                 break;
9569             default:
9570                 g_assert_not_reached();
9571             }
9572             tcg_temp_free_i32(tcg_op1);
9573             tcg_temp_free_i32(tcg_op2);
9574
9575             if (accop != 0) {
9576                 if (opcode == 9 || opcode == 11) {
9577                     /* saturating accumulate ops */
9578                     if (accop < 0) {
9579                         gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
9580                     }
9581                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
9582                                                       tcg_res[pass],
9583                                                       tcg_passres);
9584                 } else {
9585                     gen_neon_addl(size, (accop < 0), tcg_res[pass],
9586                                   tcg_res[pass], tcg_passres);
9587                 }
9588                 tcg_temp_free_i64(tcg_passres);
9589             }
9590         }
9591     }
9592
9593     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
9594     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
9595     tcg_temp_free_i64(tcg_res[0]);
9596     tcg_temp_free_i64(tcg_res[1]);
9597 }
9598
9599 static void handle_3rd_wide(DisasContext *s, int is_q, int is_u, int size,
9600                             int opcode, int rd, int rn, int rm)
9601 {
9602     TCGv_i64 tcg_res[2];
9603     int part = is_q ? 2 : 0;
9604     int pass;
9605
9606     for (pass = 0; pass < 2; pass++) {
9607         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9608         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
9609         TCGv_i64 tcg_op2_wide = tcg_temp_new_i64();
9610         static NeonGenWidenFn * const widenfns[3][2] = {
9611             { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
9612             { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
9613             { tcg_gen_ext_i32_i64, tcg_gen_extu_i32_i64 },
9614         };
9615         NeonGenWidenFn *widenfn = widenfns[size][is_u];
9616
9617         read_vec_element(s, tcg_op1, rn, pass, MO_64);
9618         read_vec_element_i32(s, tcg_op2, rm, part + pass, MO_32);
9619         widenfn(tcg_op2_wide, tcg_op2);
9620         tcg_temp_free_i32(tcg_op2);
9621         tcg_res[pass] = tcg_temp_new_i64();
9622         gen_neon_addl(size, (opcode == 3),
9623                       tcg_res[pass], tcg_op1, tcg_op2_wide);
9624         tcg_temp_free_i64(tcg_op1);
9625         tcg_temp_free_i64(tcg_op2_wide);
9626     }
9627
9628     for (pass = 0; pass < 2; pass++) {
9629         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
9630         tcg_temp_free_i64(tcg_res[pass]);
9631     }
9632 }
9633
9634 static void do_narrow_round_high_u32(TCGv_i32 res, TCGv_i64 in)
9635 {
9636     tcg_gen_addi_i64(in, in, 1U << 31);
9637     tcg_gen_extrh_i64_i32(res, in);
9638 }
9639
9640 static void handle_3rd_narrowing(DisasContext *s, int is_q, int is_u, int size,
9641                                  int opcode, int rd, int rn, int rm)
9642 {
9643     TCGv_i32 tcg_res[2];
9644     int part = is_q ? 2 : 0;
9645     int pass;
9646
9647     for (pass = 0; pass < 2; pass++) {
9648         TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9649         TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9650         TCGv_i64 tcg_wideres = tcg_temp_new_i64();
9651         static NeonGenNarrowFn * const narrowfns[3][2] = {
9652             { gen_helper_neon_narrow_high_u8,
9653               gen_helper_neon_narrow_round_high_u8 },
9654             { gen_helper_neon_narrow_high_u16,
9655               gen_helper_neon_narrow_round_high_u16 },
9656             { tcg_gen_extrh_i64_i32, do_narrow_round_high_u32 },
9657         };
9658         NeonGenNarrowFn *gennarrow = narrowfns[size][is_u];
9659
9660         read_vec_element(s, tcg_op1, rn, pass, MO_64);
9661         read_vec_element(s, tcg_op2, rm, pass, MO_64);
9662
9663         gen_neon_addl(size, (opcode == 6), tcg_wideres, tcg_op1, tcg_op2);
9664
9665         tcg_temp_free_i64(tcg_op1);
9666         tcg_temp_free_i64(tcg_op2);
9667
9668         tcg_res[pass] = tcg_temp_new_i32();
9669         gennarrow(tcg_res[pass], tcg_wideres);
9670         tcg_temp_free_i64(tcg_wideres);
9671     }
9672
9673     for (pass = 0; pass < 2; pass++) {
9674         write_vec_element_i32(s, tcg_res[pass], rd, pass + part, MO_32);
9675         tcg_temp_free_i32(tcg_res[pass]);
9676     }
9677     clear_vec_high(s, is_q, rd);
9678 }
9679
9680 static void handle_pmull_64(DisasContext *s, int is_q, int rd, int rn, int rm)
9681 {
9682     /* PMULL of 64 x 64 -> 128 is an odd special case because it
9683      * is the only three-reg-diff instruction which produces a
9684      * 128-bit wide result from a single operation. However since
9685      * it's possible to calculate the two halves more or less
9686      * separately we just use two helper calls.
9687      */
9688     TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9689     TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9690     TCGv_i64 tcg_res = tcg_temp_new_i64();
9691
9692     read_vec_element(s, tcg_op1, rn, is_q, MO_64);
9693     read_vec_element(s, tcg_op2, rm, is_q, MO_64);
9694     gen_helper_neon_pmull_64_lo(tcg_res, tcg_op1, tcg_op2);
9695     write_vec_element(s, tcg_res, rd, 0, MO_64);
9696     gen_helper_neon_pmull_64_hi(tcg_res, tcg_op1, tcg_op2);
9697     write_vec_element(s, tcg_res, rd, 1, MO_64);
9698
9699     tcg_temp_free_i64(tcg_op1);
9700     tcg_temp_free_i64(tcg_op2);
9701     tcg_temp_free_i64(tcg_res);
9702 }
9703
9704 /* AdvSIMD three different
9705  *   31  30  29 28       24 23  22  21 20  16 15    12 11 10 9    5 4    0
9706  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
9707  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 0 0 |  Rn  |  Rd  |
9708  * +---+---+---+-----------+------+---+------+--------+-----+------+------+
9709  */
9710 static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
9711 {
9712     /* Instructions in this group fall into three basic classes
9713      * (in each case with the operation working on each element in
9714      * the input vectors):
9715      * (1) widening 64 x 64 -> 128 (with possibly Vd as an extra
9716      *     128 bit input)
9717      * (2) wide 64 x 128 -> 128
9718      * (3) narrowing 128 x 128 -> 64
9719      * Here we do initial decode, catch unallocated cases and
9720      * dispatch to separate functions for each class.
9721      */
9722     int is_q = extract32(insn, 30, 1);
9723     int is_u = extract32(insn, 29, 1);
9724     int size = extract32(insn, 22, 2);
9725     int opcode = extract32(insn, 12, 4);
9726     int rm = extract32(insn, 16, 5);
9727     int rn = extract32(insn, 5, 5);
9728     int rd = extract32(insn, 0, 5);
9729
9730     switch (opcode) {
9731     case 1: /* SADDW, SADDW2, UADDW, UADDW2 */
9732     case 3: /* SSUBW, SSUBW2, USUBW, USUBW2 */
9733         /* 64 x 128 -> 128 */
9734         if (size == 3) {
9735             unallocated_encoding(s);
9736             return;
9737         }
9738         if (!fp_access_check(s)) {
9739             return;
9740         }
9741         handle_3rd_wide(s, is_q, is_u, size, opcode, rd, rn, rm);
9742         break;
9743     case 4: /* ADDHN, ADDHN2, RADDHN, RADDHN2 */
9744     case 6: /* SUBHN, SUBHN2, RSUBHN, RSUBHN2 */
9745         /* 128 x 128 -> 64 */
9746         if (size == 3) {
9747             unallocated_encoding(s);
9748             return;
9749         }
9750         if (!fp_access_check(s)) {
9751             return;
9752         }
9753         handle_3rd_narrowing(s, is_q, is_u, size, opcode, rd, rn, rm);
9754         break;
9755     case 14: /* PMULL, PMULL2 */
9756         if (is_u || size == 1 || size == 2) {
9757             unallocated_encoding(s);
9758             return;
9759         }
9760         if (size == 3) {
9761             if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
9762                 unallocated_encoding(s);
9763                 return;
9764             }
9765             if (!fp_access_check(s)) {
9766                 return;
9767             }
9768             handle_pmull_64(s, is_q, rd, rn, rm);
9769             return;
9770         }
9771         goto is_widening;
9772     case 9: /* SQDMLAL, SQDMLAL2 */
9773     case 11: /* SQDMLSL, SQDMLSL2 */
9774     case 13: /* SQDMULL, SQDMULL2 */
9775         if (is_u || size == 0) {
9776             unallocated_encoding(s);
9777             return;
9778         }
9779         /* fall through */
9780     case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
9781     case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
9782     case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
9783     case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
9784     case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
9785     case 10: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
9786     case 12: /* SMULL, SMULL2, UMULL, UMULL2 */
9787         /* 64 x 64 -> 128 */
9788         if (size == 3) {
9789             unallocated_encoding(s);
9790             return;
9791         }
9792     is_widening:
9793         if (!fp_access_check(s)) {
9794             return;
9795         }
9796
9797         handle_3rd_widening(s, is_q, is_u, size, opcode, rd, rn, rm);
9798         break;
9799     default:
9800         /* opcode 15 not allocated */
9801         unallocated_encoding(s);
9802         break;
9803     }
9804 }
9805
9806 static void gen_bsl_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
9807 {
9808     tcg_gen_xor_i64(rn, rn, rm);
9809     tcg_gen_and_i64(rn, rn, rd);
9810     tcg_gen_xor_i64(rd, rm, rn);
9811 }
9812
9813 static void gen_bit_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
9814 {
9815     tcg_gen_xor_i64(rn, rn, rd);
9816     tcg_gen_and_i64(rn, rn, rm);
9817     tcg_gen_xor_i64(rd, rd, rn);
9818 }
9819
9820 static void gen_bif_i64(TCGv_i64 rd, TCGv_i64 rn, TCGv_i64 rm)
9821 {
9822     tcg_gen_xor_i64(rn, rn, rd);
9823     tcg_gen_andc_i64(rn, rn, rm);
9824     tcg_gen_xor_i64(rd, rd, rn);
9825 }
9826
9827 static void gen_bsl_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
9828 {
9829     tcg_gen_xor_vec(vece, rn, rn, rm);
9830     tcg_gen_and_vec(vece, rn, rn, rd);
9831     tcg_gen_xor_vec(vece, rd, rm, rn);
9832 }
9833
9834 static void gen_bit_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
9835 {
9836     tcg_gen_xor_vec(vece, rn, rn, rd);
9837     tcg_gen_and_vec(vece, rn, rn, rm);
9838     tcg_gen_xor_vec(vece, rd, rd, rn);
9839 }
9840
9841 static void gen_bif_vec(unsigned vece, TCGv_vec rd, TCGv_vec rn, TCGv_vec rm)
9842 {
9843     tcg_gen_xor_vec(vece, rn, rn, rd);
9844     tcg_gen_andc_vec(vece, rn, rn, rm);
9845     tcg_gen_xor_vec(vece, rd, rd, rn);
9846 }
9847
9848 /* Logic op (opcode == 3) subgroup of C3.6.16. */
9849 static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
9850 {
9851     static const GVecGen3 bsl_op = {
9852         .fni8 = gen_bsl_i64,
9853         .fniv = gen_bsl_vec,
9854         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9855         .load_dest = true
9856     };
9857     static const GVecGen3 bit_op = {
9858         .fni8 = gen_bit_i64,
9859         .fniv = gen_bit_vec,
9860         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9861         .load_dest = true
9862     };
9863     static const GVecGen3 bif_op = {
9864         .fni8 = gen_bif_i64,
9865         .fniv = gen_bif_vec,
9866         .prefer_i64 = TCG_TARGET_REG_BITS == 64,
9867         .load_dest = true
9868     };
9869
9870     int rd = extract32(insn, 0, 5);
9871     int rn = extract32(insn, 5, 5);
9872     int rm = extract32(insn, 16, 5);
9873     int size = extract32(insn, 22, 2);
9874     bool is_u = extract32(insn, 29, 1);
9875     bool is_q = extract32(insn, 30, 1);
9876
9877     if (!fp_access_check(s)) {
9878         return;
9879     }
9880
9881     switch (size + 4 * is_u) {
9882     case 0: /* AND */
9883         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_and, 0);
9884         return;
9885     case 1: /* BIC */
9886         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_andc, 0);
9887         return;
9888     case 2: /* ORR */
9889         if (rn == rm) { /* MOV */
9890             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_mov, 0);
9891         } else {
9892             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_or, 0);
9893         }
9894         return;
9895     case 3: /* ORN */
9896         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_orc, 0);
9897         return;
9898     case 4: /* EOR */
9899         gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_xor, 0);
9900         return;
9901
9902     case 5: /* BSL bitwise select */
9903         gen_gvec_op3(s, is_q, rd, rn, rm, &bsl_op);
9904         return;
9905     case 6: /* BIT, bitwise insert if true */
9906         gen_gvec_op3(s, is_q, rd, rn, rm, &bit_op);
9907         return;
9908     case 7: /* BIF, bitwise insert if false */
9909         gen_gvec_op3(s, is_q, rd, rn, rm, &bif_op);
9910         return;
9911
9912     default:
9913         g_assert_not_reached();
9914     }
9915 }
9916
9917 /* Helper functions for 32 bit comparisons */
9918 static void gen_max_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9919 {
9920     tcg_gen_movcond_i32(TCG_COND_GE, res, op1, op2, op1, op2);
9921 }
9922
9923 static void gen_max_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9924 {
9925     tcg_gen_movcond_i32(TCG_COND_GEU, res, op1, op2, op1, op2);
9926 }
9927
9928 static void gen_min_s32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9929 {
9930     tcg_gen_movcond_i32(TCG_COND_LE, res, op1, op2, op1, op2);
9931 }
9932
9933 static void gen_min_u32(TCGv_i32 res, TCGv_i32 op1, TCGv_i32 op2)
9934 {
9935     tcg_gen_movcond_i32(TCG_COND_LEU, res, op1, op2, op1, op2);
9936 }
9937
9938 /* Pairwise op subgroup of C3.6.16.
9939  *
9940  * This is called directly or via the handle_3same_float for float pairwise
9941  * operations where the opcode and size are calculated differently.
9942  */
9943 static void handle_simd_3same_pair(DisasContext *s, int is_q, int u, int opcode,
9944                                    int size, int rn, int rm, int rd)
9945 {
9946     TCGv_ptr fpst;
9947     int pass;
9948
9949     /* Floating point operations need fpst */
9950     if (opcode >= 0x58) {
9951         fpst = get_fpstatus_ptr(false);
9952     } else {
9953         fpst = NULL;
9954     }
9955
9956     if (!fp_access_check(s)) {
9957         return;
9958     }
9959
9960     /* These operations work on the concatenated rm:rn, with each pair of
9961      * adjacent elements being operated on to produce an element in the result.
9962      */
9963     if (size == 3) {
9964         TCGv_i64 tcg_res[2];
9965
9966         for (pass = 0; pass < 2; pass++) {
9967             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
9968             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
9969             int passreg = (pass == 0) ? rn : rm;
9970
9971             read_vec_element(s, tcg_op1, passreg, 0, MO_64);
9972             read_vec_element(s, tcg_op2, passreg, 1, MO_64);
9973             tcg_res[pass] = tcg_temp_new_i64();
9974
9975             switch (opcode) {
9976             case 0x17: /* ADDP */
9977                 tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
9978                 break;
9979             case 0x58: /* FMAXNMP */
9980                 gen_helper_vfp_maxnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9981                 break;
9982             case 0x5a: /* FADDP */
9983                 gen_helper_vfp_addd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9984                 break;
9985             case 0x5e: /* FMAXP */
9986                 gen_helper_vfp_maxd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9987                 break;
9988             case 0x78: /* FMINNMP */
9989                 gen_helper_vfp_minnumd(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9990                 break;
9991             case 0x7e: /* FMINP */
9992                 gen_helper_vfp_mind(tcg_res[pass], tcg_op1, tcg_op2, fpst);
9993                 break;
9994             default:
9995                 g_assert_not_reached();
9996             }
9997
9998             tcg_temp_free_i64(tcg_op1);
9999             tcg_temp_free_i64(tcg_op2);
10000         }
10001
10002         for (pass = 0; pass < 2; pass++) {
10003             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10004             tcg_temp_free_i64(tcg_res[pass]);
10005         }
10006     } else {
10007         int maxpass = is_q ? 4 : 2;
10008         TCGv_i32 tcg_res[4];
10009
10010         for (pass = 0; pass < maxpass; pass++) {
10011             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10012             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10013             NeonGenTwoOpFn *genfn = NULL;
10014             int passreg = pass < (maxpass / 2) ? rn : rm;
10015             int passelt = (is_q && (pass & 1)) ? 2 : 0;
10016
10017             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_32);
10018             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_32);
10019             tcg_res[pass] = tcg_temp_new_i32();
10020
10021             switch (opcode) {
10022             case 0x17: /* ADDP */
10023             {
10024                 static NeonGenTwoOpFn * const fns[3] = {
10025                     gen_helper_neon_padd_u8,
10026                     gen_helper_neon_padd_u16,
10027                     tcg_gen_add_i32,
10028                 };
10029                 genfn = fns[size];
10030                 break;
10031             }
10032             case 0x14: /* SMAXP, UMAXP */
10033             {
10034                 static NeonGenTwoOpFn * const fns[3][2] = {
10035                     { gen_helper_neon_pmax_s8, gen_helper_neon_pmax_u8 },
10036                     { gen_helper_neon_pmax_s16, gen_helper_neon_pmax_u16 },
10037                     { gen_max_s32, gen_max_u32 },
10038                 };
10039                 genfn = fns[size][u];
10040                 break;
10041             }
10042             case 0x15: /* SMINP, UMINP */
10043             {
10044                 static NeonGenTwoOpFn * const fns[3][2] = {
10045                     { gen_helper_neon_pmin_s8, gen_helper_neon_pmin_u8 },
10046                     { gen_helper_neon_pmin_s16, gen_helper_neon_pmin_u16 },
10047                     { gen_min_s32, gen_min_u32 },
10048                 };
10049                 genfn = fns[size][u];
10050                 break;
10051             }
10052             /* The FP operations are all on single floats (32 bit) */
10053             case 0x58: /* FMAXNMP */
10054                 gen_helper_vfp_maxnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10055                 break;
10056             case 0x5a: /* FADDP */
10057                 gen_helper_vfp_adds(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10058                 break;
10059             case 0x5e: /* FMAXP */
10060                 gen_helper_vfp_maxs(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10061                 break;
10062             case 0x78: /* FMINNMP */
10063                 gen_helper_vfp_minnums(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10064                 break;
10065             case 0x7e: /* FMINP */
10066                 gen_helper_vfp_mins(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10067                 break;
10068             default:
10069                 g_assert_not_reached();
10070             }
10071
10072             /* FP ops called directly, otherwise call now */
10073             if (genfn) {
10074                 genfn(tcg_res[pass], tcg_op1, tcg_op2);
10075             }
10076
10077             tcg_temp_free_i32(tcg_op1);
10078             tcg_temp_free_i32(tcg_op2);
10079         }
10080
10081         for (pass = 0; pass < maxpass; pass++) {
10082             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10083             tcg_temp_free_i32(tcg_res[pass]);
10084         }
10085         clear_vec_high(s, is_q, rd);
10086     }
10087
10088     if (fpst) {
10089         tcg_temp_free_ptr(fpst);
10090     }
10091 }
10092
10093 /* Floating point op subgroup of C3.6.16. */
10094 static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
10095 {
10096     /* For floating point ops, the U, size[1] and opcode bits
10097      * together indicate the operation. size[0] indicates single
10098      * or double.
10099      */
10100     int fpopcode = extract32(insn, 11, 5)
10101         | (extract32(insn, 23, 1) << 5)
10102         | (extract32(insn, 29, 1) << 6);
10103     int is_q = extract32(insn, 30, 1);
10104     int size = extract32(insn, 22, 1);
10105     int rm = extract32(insn, 16, 5);
10106     int rn = extract32(insn, 5, 5);
10107     int rd = extract32(insn, 0, 5);
10108
10109     int datasize = is_q ? 128 : 64;
10110     int esize = 32 << size;
10111     int elements = datasize / esize;
10112
10113     if (size == 1 && !is_q) {
10114         unallocated_encoding(s);
10115         return;
10116     }
10117
10118     switch (fpopcode) {
10119     case 0x58: /* FMAXNMP */
10120     case 0x5a: /* FADDP */
10121     case 0x5e: /* FMAXP */
10122     case 0x78: /* FMINNMP */
10123     case 0x7e: /* FMINP */
10124         if (size && !is_q) {
10125             unallocated_encoding(s);
10126             return;
10127         }
10128         handle_simd_3same_pair(s, is_q, 0, fpopcode, size ? MO_64 : MO_32,
10129                                rn, rm, rd);
10130         return;
10131     case 0x1b: /* FMULX */
10132     case 0x1f: /* FRECPS */
10133     case 0x3f: /* FRSQRTS */
10134     case 0x5d: /* FACGE */
10135     case 0x7d: /* FACGT */
10136     case 0x19: /* FMLA */
10137     case 0x39: /* FMLS */
10138     case 0x18: /* FMAXNM */
10139     case 0x1a: /* FADD */
10140     case 0x1c: /* FCMEQ */
10141     case 0x1e: /* FMAX */
10142     case 0x38: /* FMINNM */
10143     case 0x3a: /* FSUB */
10144     case 0x3e: /* FMIN */
10145     case 0x5b: /* FMUL */
10146     case 0x5c: /* FCMGE */
10147     case 0x5f: /* FDIV */
10148     case 0x7a: /* FABD */
10149     case 0x7c: /* FCMGT */
10150         if (!fp_access_check(s)) {
10151             return;
10152         }
10153
10154         handle_3same_float(s, size, elements, fpopcode, rd, rn, rm);
10155         return;
10156     default:
10157         unallocated_encoding(s);
10158         return;
10159     }
10160 }
10161
10162 static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10163 {
10164     gen_helper_neon_mul_u8(a, a, b);
10165     gen_helper_neon_add_u8(d, d, a);
10166 }
10167
10168 static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10169 {
10170     gen_helper_neon_mul_u16(a, a, b);
10171     gen_helper_neon_add_u16(d, d, a);
10172 }
10173
10174 static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10175 {
10176     tcg_gen_mul_i32(a, a, b);
10177     tcg_gen_add_i32(d, d, a);
10178 }
10179
10180 static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
10181 {
10182     tcg_gen_mul_i64(a, a, b);
10183     tcg_gen_add_i64(d, d, a);
10184 }
10185
10186 static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
10187 {
10188     tcg_gen_mul_vec(vece, a, a, b);
10189     tcg_gen_add_vec(vece, d, d, a);
10190 }
10191
10192 static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10193 {
10194     gen_helper_neon_mul_u8(a, a, b);
10195     gen_helper_neon_sub_u8(d, d, a);
10196 }
10197
10198 static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10199 {
10200     gen_helper_neon_mul_u16(a, a, b);
10201     gen_helper_neon_sub_u16(d, d, a);
10202 }
10203
10204 static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
10205 {
10206     tcg_gen_mul_i32(a, a, b);
10207     tcg_gen_sub_i32(d, d, a);
10208 }
10209
10210 static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
10211 {
10212     tcg_gen_mul_i64(a, a, b);
10213     tcg_gen_sub_i64(d, d, a);
10214 }
10215
10216 static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
10217 {
10218     tcg_gen_mul_vec(vece, a, a, b);
10219     tcg_gen_sub_vec(vece, d, d, a);
10220 }
10221
10222 /* Integer op subgroup of C3.6.16. */
10223 static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
10224 {
10225     static const GVecGen3 cmtst_op[4] = {
10226         { .fni4 = gen_helper_neon_tst_u8,
10227           .fniv = gen_cmtst_vec,
10228           .vece = MO_8 },
10229         { .fni4 = gen_helper_neon_tst_u16,
10230           .fniv = gen_cmtst_vec,
10231           .vece = MO_16 },
10232         { .fni4 = gen_cmtst_i32,
10233           .fniv = gen_cmtst_vec,
10234           .vece = MO_32 },
10235         { .fni8 = gen_cmtst_i64,
10236           .fniv = gen_cmtst_vec,
10237           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10238           .vece = MO_64 },
10239     };
10240     static const GVecGen3 mla_op[4] = {
10241         { .fni4 = gen_mla8_i32,
10242           .fniv = gen_mla_vec,
10243           .opc = INDEX_op_mul_vec,
10244           .load_dest = true,
10245           .vece = MO_8 },
10246         { .fni4 = gen_mla16_i32,
10247           .fniv = gen_mla_vec,
10248           .opc = INDEX_op_mul_vec,
10249           .load_dest = true,
10250           .vece = MO_16 },
10251         { .fni4 = gen_mla32_i32,
10252           .fniv = gen_mla_vec,
10253           .opc = INDEX_op_mul_vec,
10254           .load_dest = true,
10255           .vece = MO_32 },
10256         { .fni8 = gen_mla64_i64,
10257           .fniv = gen_mla_vec,
10258           .opc = INDEX_op_mul_vec,
10259           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10260           .load_dest = true,
10261           .vece = MO_64 },
10262     };
10263     static const GVecGen3 mls_op[4] = {
10264         { .fni4 = gen_mls8_i32,
10265           .fniv = gen_mls_vec,
10266           .opc = INDEX_op_mul_vec,
10267           .load_dest = true,
10268           .vece = MO_8 },
10269         { .fni4 = gen_mls16_i32,
10270           .fniv = gen_mls_vec,
10271           .opc = INDEX_op_mul_vec,
10272           .load_dest = true,
10273           .vece = MO_16 },
10274         { .fni4 = gen_mls32_i32,
10275           .fniv = gen_mls_vec,
10276           .opc = INDEX_op_mul_vec,
10277           .load_dest = true,
10278           .vece = MO_32 },
10279         { .fni8 = gen_mls64_i64,
10280           .fniv = gen_mls_vec,
10281           .opc = INDEX_op_mul_vec,
10282           .prefer_i64 = TCG_TARGET_REG_BITS == 64,
10283           .load_dest = true,
10284           .vece = MO_64 },
10285     };
10286
10287     int is_q = extract32(insn, 30, 1);
10288     int u = extract32(insn, 29, 1);
10289     int size = extract32(insn, 22, 2);
10290     int opcode = extract32(insn, 11, 5);
10291     int rm = extract32(insn, 16, 5);
10292     int rn = extract32(insn, 5, 5);
10293     int rd = extract32(insn, 0, 5);
10294     int pass;
10295     TCGCond cond;
10296
10297     switch (opcode) {
10298     case 0x13: /* MUL, PMUL */
10299         if (u && size != 0) {
10300             unallocated_encoding(s);
10301             return;
10302         }
10303         /* fall through */
10304     case 0x0: /* SHADD, UHADD */
10305     case 0x2: /* SRHADD, URHADD */
10306     case 0x4: /* SHSUB, UHSUB */
10307     case 0xc: /* SMAX, UMAX */
10308     case 0xd: /* SMIN, UMIN */
10309     case 0xe: /* SABD, UABD */
10310     case 0xf: /* SABA, UABA */
10311     case 0x12: /* MLA, MLS */
10312         if (size == 3) {
10313             unallocated_encoding(s);
10314             return;
10315         }
10316         break;
10317     case 0x16: /* SQDMULH, SQRDMULH */
10318         if (size == 0 || size == 3) {
10319             unallocated_encoding(s);
10320             return;
10321         }
10322         break;
10323     default:
10324         if (size == 3 && !is_q) {
10325             unallocated_encoding(s);
10326             return;
10327         }
10328         break;
10329     }
10330
10331     if (!fp_access_check(s)) {
10332         return;
10333     }
10334
10335     switch (opcode) {
10336     case 0x10: /* ADD, SUB */
10337         if (u) {
10338             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_sub, size);
10339         } else {
10340             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_add, size);
10341         }
10342         return;
10343     case 0x13: /* MUL, PMUL */
10344         if (!u) { /* MUL */
10345             gen_gvec_fn3(s, is_q, rd, rn, rm, tcg_gen_gvec_mul, size);
10346             return;
10347         }
10348         break;
10349     case 0x12: /* MLA, MLS */
10350         if (u) {
10351             gen_gvec_op3(s, is_q, rd, rn, rm, &mls_op[size]);
10352         } else {
10353             gen_gvec_op3(s, is_q, rd, rn, rm, &mla_op[size]);
10354         }
10355         return;
10356     case 0x11:
10357         if (!u) { /* CMTST */
10358             gen_gvec_op3(s, is_q, rd, rn, rm, &cmtst_op[size]);
10359             return;
10360         }
10361         /* else CMEQ */
10362         cond = TCG_COND_EQ;
10363         goto do_gvec_cmp;
10364     case 0x06: /* CMGT, CMHI */
10365         cond = u ? TCG_COND_GTU : TCG_COND_GT;
10366         goto do_gvec_cmp;
10367     case 0x07: /* CMGE, CMHS */
10368         cond = u ? TCG_COND_GEU : TCG_COND_GE;
10369     do_gvec_cmp:
10370         tcg_gen_gvec_cmp(cond, size, vec_full_reg_offset(s, rd),
10371                          vec_full_reg_offset(s, rn),
10372                          vec_full_reg_offset(s, rm),
10373                          is_q ? 16 : 8, vec_full_reg_size(s));
10374         return;
10375     }
10376
10377     if (size == 3) {
10378         assert(is_q);
10379         for (pass = 0; pass < 2; pass++) {
10380             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
10381             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
10382             TCGv_i64 tcg_res = tcg_temp_new_i64();
10383
10384             read_vec_element(s, tcg_op1, rn, pass, MO_64);
10385             read_vec_element(s, tcg_op2, rm, pass, MO_64);
10386
10387             handle_3same_64(s, opcode, u, tcg_res, tcg_op1, tcg_op2);
10388
10389             write_vec_element(s, tcg_res, rd, pass, MO_64);
10390
10391             tcg_temp_free_i64(tcg_res);
10392             tcg_temp_free_i64(tcg_op1);
10393             tcg_temp_free_i64(tcg_op2);
10394         }
10395     } else {
10396         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
10397             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10398             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10399             TCGv_i32 tcg_res = tcg_temp_new_i32();
10400             NeonGenTwoOpFn *genfn = NULL;
10401             NeonGenTwoOpEnvFn *genenvfn = NULL;
10402
10403             read_vec_element_i32(s, tcg_op1, rn, pass, MO_32);
10404             read_vec_element_i32(s, tcg_op2, rm, pass, MO_32);
10405
10406             switch (opcode) {
10407             case 0x0: /* SHADD, UHADD */
10408             {
10409                 static NeonGenTwoOpFn * const fns[3][2] = {
10410                     { gen_helper_neon_hadd_s8, gen_helper_neon_hadd_u8 },
10411                     { gen_helper_neon_hadd_s16, gen_helper_neon_hadd_u16 },
10412                     { gen_helper_neon_hadd_s32, gen_helper_neon_hadd_u32 },
10413                 };
10414                 genfn = fns[size][u];
10415                 break;
10416             }
10417             case 0x1: /* SQADD, UQADD */
10418             {
10419                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10420                     { gen_helper_neon_qadd_s8, gen_helper_neon_qadd_u8 },
10421                     { gen_helper_neon_qadd_s16, gen_helper_neon_qadd_u16 },
10422                     { gen_helper_neon_qadd_s32, gen_helper_neon_qadd_u32 },
10423                 };
10424                 genenvfn = fns[size][u];
10425                 break;
10426             }
10427             case 0x2: /* SRHADD, URHADD */
10428             {
10429                 static NeonGenTwoOpFn * const fns[3][2] = {
10430                     { gen_helper_neon_rhadd_s8, gen_helper_neon_rhadd_u8 },
10431                     { gen_helper_neon_rhadd_s16, gen_helper_neon_rhadd_u16 },
10432                     { gen_helper_neon_rhadd_s32, gen_helper_neon_rhadd_u32 },
10433                 };
10434                 genfn = fns[size][u];
10435                 break;
10436             }
10437             case 0x4: /* SHSUB, UHSUB */
10438             {
10439                 static NeonGenTwoOpFn * const fns[3][2] = {
10440                     { gen_helper_neon_hsub_s8, gen_helper_neon_hsub_u8 },
10441                     { gen_helper_neon_hsub_s16, gen_helper_neon_hsub_u16 },
10442                     { gen_helper_neon_hsub_s32, gen_helper_neon_hsub_u32 },
10443                 };
10444                 genfn = fns[size][u];
10445                 break;
10446             }
10447             case 0x5: /* SQSUB, UQSUB */
10448             {
10449                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10450                     { gen_helper_neon_qsub_s8, gen_helper_neon_qsub_u8 },
10451                     { gen_helper_neon_qsub_s16, gen_helper_neon_qsub_u16 },
10452                     { gen_helper_neon_qsub_s32, gen_helper_neon_qsub_u32 },
10453                 };
10454                 genenvfn = fns[size][u];
10455                 break;
10456             }
10457             case 0x8: /* SSHL, USHL */
10458             {
10459                 static NeonGenTwoOpFn * const fns[3][2] = {
10460                     { gen_helper_neon_shl_s8, gen_helper_neon_shl_u8 },
10461                     { gen_helper_neon_shl_s16, gen_helper_neon_shl_u16 },
10462                     { gen_helper_neon_shl_s32, gen_helper_neon_shl_u32 },
10463                 };
10464                 genfn = fns[size][u];
10465                 break;
10466             }
10467             case 0x9: /* SQSHL, UQSHL */
10468             {
10469                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10470                     { gen_helper_neon_qshl_s8, gen_helper_neon_qshl_u8 },
10471                     { gen_helper_neon_qshl_s16, gen_helper_neon_qshl_u16 },
10472                     { gen_helper_neon_qshl_s32, gen_helper_neon_qshl_u32 },
10473                 };
10474                 genenvfn = fns[size][u];
10475                 break;
10476             }
10477             case 0xa: /* SRSHL, URSHL */
10478             {
10479                 static NeonGenTwoOpFn * const fns[3][2] = {
10480                     { gen_helper_neon_rshl_s8, gen_helper_neon_rshl_u8 },
10481                     { gen_helper_neon_rshl_s16, gen_helper_neon_rshl_u16 },
10482                     { gen_helper_neon_rshl_s32, gen_helper_neon_rshl_u32 },
10483                 };
10484                 genfn = fns[size][u];
10485                 break;
10486             }
10487             case 0xb: /* SQRSHL, UQRSHL */
10488             {
10489                 static NeonGenTwoOpEnvFn * const fns[3][2] = {
10490                     { gen_helper_neon_qrshl_s8, gen_helper_neon_qrshl_u8 },
10491                     { gen_helper_neon_qrshl_s16, gen_helper_neon_qrshl_u16 },
10492                     { gen_helper_neon_qrshl_s32, gen_helper_neon_qrshl_u32 },
10493                 };
10494                 genenvfn = fns[size][u];
10495                 break;
10496             }
10497             case 0xc: /* SMAX, UMAX */
10498             {
10499                 static NeonGenTwoOpFn * const fns[3][2] = {
10500                     { gen_helper_neon_max_s8, gen_helper_neon_max_u8 },
10501                     { gen_helper_neon_max_s16, gen_helper_neon_max_u16 },
10502                     { gen_max_s32, gen_max_u32 },
10503                 };
10504                 genfn = fns[size][u];
10505                 break;
10506             }
10507
10508             case 0xd: /* SMIN, UMIN */
10509             {
10510                 static NeonGenTwoOpFn * const fns[3][2] = {
10511                     { gen_helper_neon_min_s8, gen_helper_neon_min_u8 },
10512                     { gen_helper_neon_min_s16, gen_helper_neon_min_u16 },
10513                     { gen_min_s32, gen_min_u32 },
10514                 };
10515                 genfn = fns[size][u];
10516                 break;
10517             }
10518             case 0xe: /* SABD, UABD */
10519             case 0xf: /* SABA, UABA */
10520             {
10521                 static NeonGenTwoOpFn * const fns[3][2] = {
10522                     { gen_helper_neon_abd_s8, gen_helper_neon_abd_u8 },
10523                     { gen_helper_neon_abd_s16, gen_helper_neon_abd_u16 },
10524                     { gen_helper_neon_abd_s32, gen_helper_neon_abd_u32 },
10525                 };
10526                 genfn = fns[size][u];
10527                 break;
10528             }
10529             case 0x13: /* MUL, PMUL */
10530                 assert(u); /* PMUL */
10531                 assert(size == 0);
10532                 genfn = gen_helper_neon_mul_p8;
10533                 break;
10534             case 0x16: /* SQDMULH, SQRDMULH */
10535             {
10536                 static NeonGenTwoOpEnvFn * const fns[2][2] = {
10537                     { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
10538                     { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
10539                 };
10540                 assert(size == 1 || size == 2);
10541                 genenvfn = fns[size - 1][u];
10542                 break;
10543             }
10544             default:
10545                 g_assert_not_reached();
10546             }
10547
10548             if (genenvfn) {
10549                 genenvfn(tcg_res, cpu_env, tcg_op1, tcg_op2);
10550             } else {
10551                 genfn(tcg_res, tcg_op1, tcg_op2);
10552             }
10553
10554             if (opcode == 0xf) {
10555                 /* SABA, UABA: accumulating ops */
10556                 static NeonGenTwoOpFn * const fns[3] = {
10557                     gen_helper_neon_add_u8,
10558                     gen_helper_neon_add_u16,
10559                     tcg_gen_add_i32,
10560                 };
10561
10562                 read_vec_element_i32(s, tcg_op1, rd, pass, MO_32);
10563                 fns[size](tcg_res, tcg_op1, tcg_res);
10564             }
10565
10566             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
10567
10568             tcg_temp_free_i32(tcg_res);
10569             tcg_temp_free_i32(tcg_op1);
10570             tcg_temp_free_i32(tcg_op2);
10571         }
10572     }
10573     clear_vec_high(s, is_q, rd);
10574 }
10575
10576 /* AdvSIMD three same
10577  *  31  30  29  28       24 23  22  21 20  16 15    11  10 9    5 4    0
10578  * +---+---+---+-----------+------+---+------+--------+---+------+------+
10579  * | 0 | Q | U | 0 1 1 1 0 | size | 1 |  Rm  | opcode | 1 |  Rn  |  Rd  |
10580  * +---+---+---+-----------+------+---+------+--------+---+------+------+
10581  */
10582 static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
10583 {
10584     int opcode = extract32(insn, 11, 5);
10585
10586     switch (opcode) {
10587     case 0x3: /* logic ops */
10588         disas_simd_3same_logic(s, insn);
10589         break;
10590     case 0x17: /* ADDP */
10591     case 0x14: /* SMAXP, UMAXP */
10592     case 0x15: /* SMINP, UMINP */
10593     {
10594         /* Pairwise operations */
10595         int is_q = extract32(insn, 30, 1);
10596         int u = extract32(insn, 29, 1);
10597         int size = extract32(insn, 22, 2);
10598         int rm = extract32(insn, 16, 5);
10599         int rn = extract32(insn, 5, 5);
10600         int rd = extract32(insn, 0, 5);
10601         if (opcode == 0x17) {
10602             if (u || (size == 3 && !is_q)) {
10603                 unallocated_encoding(s);
10604                 return;
10605             }
10606         } else {
10607             if (size == 3) {
10608                 unallocated_encoding(s);
10609                 return;
10610             }
10611         }
10612         handle_simd_3same_pair(s, is_q, u, opcode, size, rn, rm, rd);
10613         break;
10614     }
10615     case 0x18 ... 0x31:
10616         /* floating point ops, sz[1] and U are part of opcode */
10617         disas_simd_3same_float(s, insn);
10618         break;
10619     default:
10620         disas_simd_3same_int(s, insn);
10621         break;
10622     }
10623 }
10624
10625 /*
10626  * Advanced SIMD three same (ARMv8.2 FP16 variants)
10627  *
10628  *  31  30  29  28       24 23  22 21 20  16 15 14 13    11 10  9    5 4    0
10629  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
10630  * | 0 | Q | U | 0 1 1 1 0 | a | 1 0 |  Rm  | 0 0 | opcode | 1 |  Rn  |  Rd  |
10631  * +---+---+---+-----------+---------+------+-----+--------+---+------+------+
10632  *
10633  * This includes FMULX, FCMEQ (register), FRECPS, FRSQRTS, FCMGE
10634  * (register), FACGE, FABD, FCMGT (register) and FACGT.
10635  *
10636  */
10637 static void disas_simd_three_reg_same_fp16(DisasContext *s, uint32_t insn)
10638 {
10639     int opcode, fpopcode;
10640     int is_q, u, a, rm, rn, rd;
10641     int datasize, elements;
10642     int pass;
10643     TCGv_ptr fpst;
10644     bool pairwise = false;
10645
10646     if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
10647         unallocated_encoding(s);
10648         return;
10649     }
10650
10651     if (!fp_access_check(s)) {
10652         return;
10653     }
10654
10655     /* For these floating point ops, the U, a and opcode bits
10656      * together indicate the operation.
10657      */
10658     opcode = extract32(insn, 11, 3);
10659     u = extract32(insn, 29, 1);
10660     a = extract32(insn, 23, 1);
10661     is_q = extract32(insn, 30, 1);
10662     rm = extract32(insn, 16, 5);
10663     rn = extract32(insn, 5, 5);
10664     rd = extract32(insn, 0, 5);
10665
10666     fpopcode = opcode | (a << 3) |  (u << 4);
10667     datasize = is_q ? 128 : 64;
10668     elements = datasize / 16;
10669
10670     switch (fpopcode) {
10671     case 0x10: /* FMAXNMP */
10672     case 0x12: /* FADDP */
10673     case 0x16: /* FMAXP */
10674     case 0x18: /* FMINNMP */
10675     case 0x1e: /* FMINP */
10676         pairwise = true;
10677         break;
10678     }
10679
10680     fpst = get_fpstatus_ptr(true);
10681
10682     if (pairwise) {
10683         int maxpass = is_q ? 8 : 4;
10684         TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10685         TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10686         TCGv_i32 tcg_res[8];
10687
10688         for (pass = 0; pass < maxpass; pass++) {
10689             int passreg = pass < (maxpass / 2) ? rn : rm;
10690             int passelt = (pass << 1) & (maxpass - 1);
10691
10692             read_vec_element_i32(s, tcg_op1, passreg, passelt, MO_16);
10693             read_vec_element_i32(s, tcg_op2, passreg, passelt + 1, MO_16);
10694             tcg_res[pass] = tcg_temp_new_i32();
10695
10696             switch (fpopcode) {
10697             case 0x10: /* FMAXNMP */
10698                 gen_helper_advsimd_maxnumh(tcg_res[pass], tcg_op1, tcg_op2,
10699                                            fpst);
10700                 break;
10701             case 0x12: /* FADDP */
10702                 gen_helper_advsimd_addh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10703                 break;
10704             case 0x16: /* FMAXP */
10705                 gen_helper_advsimd_maxh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10706                 break;
10707             case 0x18: /* FMINNMP */
10708                 gen_helper_advsimd_minnumh(tcg_res[pass], tcg_op1, tcg_op2,
10709                                            fpst);
10710                 break;
10711             case 0x1e: /* FMINP */
10712                 gen_helper_advsimd_minh(tcg_res[pass], tcg_op1, tcg_op2, fpst);
10713                 break;
10714             default:
10715                 g_assert_not_reached();
10716             }
10717         }
10718
10719         for (pass = 0; pass < maxpass; pass++) {
10720             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_16);
10721             tcg_temp_free_i32(tcg_res[pass]);
10722         }
10723
10724         tcg_temp_free_i32(tcg_op1);
10725         tcg_temp_free_i32(tcg_op2);
10726
10727     } else {
10728         for (pass = 0; pass < elements; pass++) {
10729             TCGv_i32 tcg_op1 = tcg_temp_new_i32();
10730             TCGv_i32 tcg_op2 = tcg_temp_new_i32();
10731             TCGv_i32 tcg_res = tcg_temp_new_i32();
10732
10733             read_vec_element_i32(s, tcg_op1, rn, pass, MO_16);
10734             read_vec_element_i32(s, tcg_op2, rm, pass, MO_16);
10735
10736             switch (fpopcode) {
10737             case 0x0: /* FMAXNM */
10738                 gen_helper_advsimd_maxnumh(tcg_res, tcg_op1, tcg_op2, fpst);
10739                 break;
10740             case 0x1: /* FMLA */
10741                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
10742                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
10743                                            fpst);
10744                 break;
10745             case 0x2: /* FADD */
10746                 gen_helper_advsimd_addh(tcg_res, tcg_op1, tcg_op2, fpst);
10747                 break;
10748             case 0x3: /* FMULX */
10749                 gen_helper_advsimd_mulxh(tcg_res, tcg_op1, tcg_op2, fpst);
10750                 break;
10751             case 0x4: /* FCMEQ */
10752                 gen_helper_advsimd_ceq_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10753                 break;
10754             case 0x6: /* FMAX */
10755                 gen_helper_advsimd_maxh(tcg_res, tcg_op1, tcg_op2, fpst);
10756                 break;
10757             case 0x7: /* FRECPS */
10758                 gen_helper_recpsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10759                 break;
10760             case 0x8: /* FMINNM */
10761                 gen_helper_advsimd_minnumh(tcg_res, tcg_op1, tcg_op2, fpst);
10762                 break;
10763             case 0x9: /* FMLS */
10764                 /* As usual for ARM, separate negation for fused multiply-add */
10765                 tcg_gen_xori_i32(tcg_op1, tcg_op1, 0x8000);
10766                 read_vec_element_i32(s, tcg_res, rd, pass, MO_16);
10767                 gen_helper_advsimd_muladdh(tcg_res, tcg_op1, tcg_op2, tcg_res,
10768                                            fpst);
10769                 break;
10770             case 0xa: /* FSUB */
10771                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
10772                 break;
10773             case 0xe: /* FMIN */
10774                 gen_helper_advsimd_minh(tcg_res, tcg_op1, tcg_op2, fpst);
10775                 break;
10776             case 0xf: /* FRSQRTS */
10777                 gen_helper_rsqrtsf_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10778                 break;
10779             case 0x13: /* FMUL */
10780                 gen_helper_advsimd_mulh(tcg_res, tcg_op1, tcg_op2, fpst);
10781                 break;
10782             case 0x14: /* FCMGE */
10783                 gen_helper_advsimd_cge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10784                 break;
10785             case 0x15: /* FACGE */
10786                 gen_helper_advsimd_acge_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10787                 break;
10788             case 0x17: /* FDIV */
10789                 gen_helper_advsimd_divh(tcg_res, tcg_op1, tcg_op2, fpst);
10790                 break;
10791             case 0x1a: /* FABD */
10792                 gen_helper_advsimd_subh(tcg_res, tcg_op1, tcg_op2, fpst);
10793                 tcg_gen_andi_i32(tcg_res, tcg_res, 0x7fff);
10794                 break;
10795             case 0x1c: /* FCMGT */
10796                 gen_helper_advsimd_cgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10797                 break;
10798             case 0x1d: /* FACGT */
10799                 gen_helper_advsimd_acgt_f16(tcg_res, tcg_op1, tcg_op2, fpst);
10800                 break;
10801             default:
10802                 fprintf(stderr, "%s: insn %#04x, fpop %#2x @ %#" PRIx64 "\n",
10803                         __func__, insn, fpopcode, s->pc);
10804                 g_assert_not_reached();
10805             }
10806
10807             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
10808             tcg_temp_free_i32(tcg_res);
10809             tcg_temp_free_i32(tcg_op1);
10810             tcg_temp_free_i32(tcg_op2);
10811         }
10812     }
10813
10814     tcg_temp_free_ptr(fpst);
10815
10816     clear_vec_high(s, is_q, rd);
10817 }
10818
10819 /* AdvSIMD three same extra
10820  *  31   30  29 28       24 23  22  21 20  16  15 14    11  10 9  5 4  0
10821  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
10822  * | 0 | Q | U | 0 1 1 1 0 | size | 0 |  Rm  | 1 | opcode | 1 | Rn | Rd |
10823  * +---+---+---+-----------+------+---+------+---+--------+---+----+----+
10824  */
10825 static void disas_simd_three_reg_same_extra(DisasContext *s, uint32_t insn)
10826 {
10827     int rd = extract32(insn, 0, 5);
10828     int rn = extract32(insn, 5, 5);
10829     int opcode = extract32(insn, 11, 4);
10830     int rm = extract32(insn, 16, 5);
10831     int size = extract32(insn, 22, 2);
10832     bool u = extract32(insn, 29, 1);
10833     bool is_q = extract32(insn, 30, 1);
10834     int feature, rot;
10835
10836     switch (u * 16 + opcode) {
10837     case 0x10: /* SQRDMLAH (vector) */
10838     case 0x11: /* SQRDMLSH (vector) */
10839         if (size != 1 && size != 2) {
10840             unallocated_encoding(s);
10841             return;
10842         }
10843         feature = ARM_FEATURE_V8_RDM;
10844         break;
10845     case 0xc: /* FCADD, #90 */
10846     case 0xe: /* FCADD, #270 */
10847         if (size == 0
10848             || (size == 1 && !arm_dc_feature(s, ARM_FEATURE_V8_FP16))
10849             || (size == 3 && !is_q)) {
10850             unallocated_encoding(s);
10851             return;
10852         }
10853         feature = ARM_FEATURE_V8_FCMA;
10854         break;
10855     default:
10856         unallocated_encoding(s);
10857         return;
10858     }
10859     if (!arm_dc_feature(s, feature)) {
10860         unallocated_encoding(s);
10861         return;
10862     }
10863     if (!fp_access_check(s)) {
10864         return;
10865     }
10866
10867     switch (opcode) {
10868     case 0x0: /* SQRDMLAH (vector) */
10869         switch (size) {
10870         case 1:
10871             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s16);
10872             break;
10873         case 2:
10874             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlah_s32);
10875             break;
10876         default:
10877             g_assert_not_reached();
10878         }
10879         return;
10880
10881     case 0x1: /* SQRDMLSH (vector) */
10882         switch (size) {
10883         case 1:
10884             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s16);
10885             break;
10886         case 2:
10887             gen_gvec_op3_env(s, is_q, rd, rn, rm, gen_helper_gvec_qrdmlsh_s32);
10888             break;
10889         default:
10890             g_assert_not_reached();
10891         }
10892         return;
10893
10894     case 0xc: /* FCADD, #90 */
10895     case 0xe: /* FCADD, #270 */
10896         rot = extract32(opcode, 1, 1);
10897         switch (size) {
10898         case 1:
10899             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
10900                               gen_helper_gvec_fcaddh);
10901             break;
10902         case 2:
10903             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
10904                               gen_helper_gvec_fcadds);
10905             break;
10906         case 3:
10907             gen_gvec_op3_fpst(s, is_q, rd, rn, rm, size == 1, rot,
10908                               gen_helper_gvec_fcaddd);
10909             break;
10910         default:
10911             g_assert_not_reached();
10912         }
10913         return;
10914
10915     default:
10916         g_assert_not_reached();
10917     }
10918 }
10919
10920 static void handle_2misc_widening(DisasContext *s, int opcode, bool is_q,
10921                                   int size, int rn, int rd)
10922 {
10923     /* Handle 2-reg-misc ops which are widening (so each size element
10924      * in the source becomes a 2*size element in the destination.
10925      * The only instruction like this is FCVTL.
10926      */
10927     int pass;
10928
10929     if (size == 3) {
10930         /* 32 -> 64 bit fp conversion */
10931         TCGv_i64 tcg_res[2];
10932         int srcelt = is_q ? 2 : 0;
10933
10934         for (pass = 0; pass < 2; pass++) {
10935             TCGv_i32 tcg_op = tcg_temp_new_i32();
10936             tcg_res[pass] = tcg_temp_new_i64();
10937
10938             read_vec_element_i32(s, tcg_op, rn, srcelt + pass, MO_32);
10939             gen_helper_vfp_fcvtds(tcg_res[pass], tcg_op, cpu_env);
10940             tcg_temp_free_i32(tcg_op);
10941         }
10942         for (pass = 0; pass < 2; pass++) {
10943             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
10944             tcg_temp_free_i64(tcg_res[pass]);
10945         }
10946     } else {
10947         /* 16 -> 32 bit fp conversion */
10948         int srcelt = is_q ? 4 : 0;
10949         TCGv_i32 tcg_res[4];
10950
10951         for (pass = 0; pass < 4; pass++) {
10952             tcg_res[pass] = tcg_temp_new_i32();
10953
10954             read_vec_element_i32(s, tcg_res[pass], rn, srcelt + pass, MO_16);
10955             gen_helper_vfp_fcvt_f16_to_f32(tcg_res[pass], tcg_res[pass],
10956                                            cpu_env);
10957         }
10958         for (pass = 0; pass < 4; pass++) {
10959             write_vec_element_i32(s, tcg_res[pass], rd, pass, MO_32);
10960             tcg_temp_free_i32(tcg_res[pass]);
10961         }
10962     }
10963 }
10964
10965 static void handle_rev(DisasContext *s, int opcode, bool u,
10966                        bool is_q, int size, int rn, int rd)
10967 {
10968     int op = (opcode << 1) | u;
10969     int opsz = op + size;
10970     int grp_size = 3 - opsz;
10971     int dsize = is_q ? 128 : 64;
10972     int i;
10973
10974     if (opsz >= 3) {
10975         unallocated_encoding(s);
10976         return;
10977     }
10978
10979     if (!fp_access_check(s)) {
10980         return;
10981     }
10982
10983     if (size == 0) {
10984         /* Special case bytes, use bswap op on each group of elements */
10985         int groups = dsize / (8 << grp_size);
10986
10987         for (i = 0; i < groups; i++) {
10988             TCGv_i64 tcg_tmp = tcg_temp_new_i64();
10989
10990             read_vec_element(s, tcg_tmp, rn, i, grp_size);
10991             switch (grp_size) {
10992             case MO_16:
10993                 tcg_gen_bswap16_i64(tcg_tmp, tcg_tmp);
10994                 break;
10995             case MO_32:
10996                 tcg_gen_bswap32_i64(tcg_tmp, tcg_tmp);
10997                 break;
10998             case MO_64:
10999                 tcg_gen_bswap64_i64(tcg_tmp, tcg_tmp);
11000                 break;
11001             default:
11002                 g_assert_not_reached();
11003             }
11004             write_vec_element(s, tcg_tmp, rd, i, grp_size);
11005             tcg_temp_free_i64(tcg_tmp);
11006         }
11007         clear_vec_high(s, is_q, rd);
11008     } else {
11009         int revmask = (1 << grp_size) - 1;
11010         int esize = 8 << size;
11011         int elements = dsize / esize;
11012         TCGv_i64 tcg_rn = tcg_temp_new_i64();
11013         TCGv_i64 tcg_rd = tcg_const_i64(0);
11014         TCGv_i64 tcg_rd_hi = tcg_const_i64(0);
11015
11016         for (i = 0; i < elements; i++) {
11017             int e_rev = (i & 0xf) ^ revmask;
11018             int off = e_rev * esize;
11019             read_vec_element(s, tcg_rn, rn, i, size);
11020             if (off >= 64) {
11021                 tcg_gen_deposit_i64(tcg_rd_hi, tcg_rd_hi,
11022                                     tcg_rn, off - 64, esize);
11023             } else {
11024                 tcg_gen_deposit_i64(tcg_rd, tcg_rd, tcg_rn, off, esize);
11025             }
11026         }
11027         write_vec_element(s, tcg_rd, rd, 0, MO_64);
11028         write_vec_element(s, tcg_rd_hi, rd, 1, MO_64);
11029
11030         tcg_temp_free_i64(tcg_rd_hi);
11031         tcg_temp_free_i64(tcg_rd);
11032         tcg_temp_free_i64(tcg_rn);
11033     }
11034 }
11035
11036 static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
11037                                   bool is_q, int size, int rn, int rd)
11038 {
11039     /* Implement the pairwise operations from 2-misc:
11040      * SADDLP, UADDLP, SADALP, UADALP.
11041      * These all add pairs of elements in the input to produce a
11042      * double-width result element in the output (possibly accumulating).
11043      */
11044     bool accum = (opcode == 0x6);
11045     int maxpass = is_q ? 2 : 1;
11046     int pass;
11047     TCGv_i64 tcg_res[2];
11048
11049     if (size == 2) {
11050         /* 32 + 32 -> 64 op */
11051         TCGMemOp memop = size + (u ? 0 : MO_SIGN);
11052
11053         for (pass = 0; pass < maxpass; pass++) {
11054             TCGv_i64 tcg_op1 = tcg_temp_new_i64();
11055             TCGv_i64 tcg_op2 = tcg_temp_new_i64();
11056
11057             tcg_res[pass] = tcg_temp_new_i64();
11058
11059             read_vec_element(s, tcg_op1, rn, pass * 2, memop);
11060             read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
11061             tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
11062             if (accum) {
11063                 read_vec_element(s, tcg_op1, rd, pass, MO_64);
11064                 tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
11065             }
11066
11067             tcg_temp_free_i64(tcg_op1);
11068             tcg_temp_free_i64(tcg_op2);
11069         }
11070     } else {
11071         for (pass = 0; pass < maxpass; pass++) {
11072             TCGv_i64 tcg_op = tcg_temp_new_i64();
11073             NeonGenOneOpFn *genfn;
11074             static NeonGenOneOpFn * const fns[2][2] = {
11075                 { gen_helper_neon_addlp_s8,  gen_helper_neon_addlp_u8 },
11076                 { gen_helper_neon_addlp_s16,  gen_helper_neon_addlp_u16 },
11077             };
11078
11079             genfn = fns[size][u];
11080
11081             tcg_res[pass] = tcg_temp_new_i64();
11082
11083             read_vec_element(s, tcg_op, rn, pass, MO_64);
11084             genfn(tcg_res[pass], tcg_op);
11085
11086             if (accum) {
11087                 read_vec_element(s, tcg_op, rd, pass, MO_64);
11088                 if (size == 0) {
11089                     gen_helper_neon_addl_u16(tcg_res[pass],
11090                                              tcg_res[pass], tcg_op);
11091                 } else {
11092                     gen_helper_neon_addl_u32(tcg_res[pass],
11093                                              tcg_res[pass], tcg_op);
11094                 }
11095             }
11096             tcg_temp_free_i64(tcg_op);
11097         }
11098     }
11099     if (!is_q) {
11100         tcg_res[1] = tcg_const_i64(0);
11101     }
11102     for (pass = 0; pass < 2; pass++) {
11103         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11104         tcg_temp_free_i64(tcg_res[pass]);
11105     }
11106 }
11107
11108 static void handle_shll(DisasContext *s, bool is_q, int size, int rn, int rd)
11109 {
11110     /* Implement SHLL and SHLL2 */
11111     int pass;
11112     int part = is_q ? 2 : 0;
11113     TCGv_i64 tcg_res[2];
11114
11115     for (pass = 0; pass < 2; pass++) {
11116         static NeonGenWidenFn * const widenfns[3] = {
11117             gen_helper_neon_widen_u8,
11118             gen_helper_neon_widen_u16,
11119             tcg_gen_extu_i32_i64,
11120         };
11121         NeonGenWidenFn *widenfn = widenfns[size];
11122         TCGv_i32 tcg_op = tcg_temp_new_i32();
11123
11124         read_vec_element_i32(s, tcg_op, rn, part + pass, MO_32);
11125         tcg_res[pass] = tcg_temp_new_i64();
11126         widenfn(tcg_res[pass], tcg_op);
11127         tcg_gen_shli_i64(tcg_res[pass], tcg_res[pass], 8 << size);
11128
11129         tcg_temp_free_i32(tcg_op);
11130     }
11131
11132     for (pass = 0; pass < 2; pass++) {
11133         write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
11134         tcg_temp_free_i64(tcg_res[pass]);
11135     }
11136 }
11137
11138 /* AdvSIMD two reg misc
11139  *   31  30  29 28       24 23  22 21       17 16    12 11 10 9    5 4    0
11140  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11141  * | 0 | Q | U | 0 1 1 1 0 | size | 1 0 0 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11142  * +---+---+---+-----------+------+-----------+--------+-----+------+------+
11143  */
11144 static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
11145 {
11146     int size = extract32(insn, 22, 2);
11147     int opcode = extract32(insn, 12, 5);
11148     bool u = extract32(insn, 29, 1);
11149     bool is_q = extract32(insn, 30, 1);
11150     int rn = extract32(insn, 5, 5);
11151     int rd = extract32(insn, 0, 5);
11152     bool need_fpstatus = false;
11153     bool need_rmode = false;
11154     int rmode = -1;
11155     TCGv_i32 tcg_rmode;
11156     TCGv_ptr tcg_fpstatus;
11157
11158     switch (opcode) {
11159     case 0x0: /* REV64, REV32 */
11160     case 0x1: /* REV16 */
11161         handle_rev(s, opcode, u, is_q, size, rn, rd);
11162         return;
11163     case 0x5: /* CNT, NOT, RBIT */
11164         if (u && size == 0) {
11165             /* NOT */
11166             break;
11167         } else if (u && size == 1) {
11168             /* RBIT */
11169             break;
11170         } else if (!u && size == 0) {
11171             /* CNT */
11172             break;
11173         }
11174         unallocated_encoding(s);
11175         return;
11176     case 0x12: /* XTN, XTN2, SQXTUN, SQXTUN2 */
11177     case 0x14: /* SQXTN, SQXTN2, UQXTN, UQXTN2 */
11178         if (size == 3) {
11179             unallocated_encoding(s);
11180             return;
11181         }
11182         if (!fp_access_check(s)) {
11183             return;
11184         }
11185
11186         handle_2misc_narrow(s, false, opcode, u, is_q, size, rn, rd);
11187         return;
11188     case 0x4: /* CLS, CLZ */
11189         if (size == 3) {
11190             unallocated_encoding(s);
11191             return;
11192         }
11193         break;
11194     case 0x2: /* SADDLP, UADDLP */
11195     case 0x6: /* SADALP, UADALP */
11196         if (size == 3) {
11197             unallocated_encoding(s);
11198             return;
11199         }
11200         if (!fp_access_check(s)) {
11201             return;
11202         }
11203         handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
11204         return;
11205     case 0x13: /* SHLL, SHLL2 */
11206         if (u == 0 || size == 3) {
11207             unallocated_encoding(s);
11208             return;
11209         }
11210         if (!fp_access_check(s)) {
11211             return;
11212         }
11213         handle_shll(s, is_q, size, rn, rd);
11214         return;
11215     case 0xa: /* CMLT */
11216         if (u == 1) {
11217             unallocated_encoding(s);
11218             return;
11219         }
11220         /* fall through */
11221     case 0x8: /* CMGT, CMGE */
11222     case 0x9: /* CMEQ, CMLE */
11223     case 0xb: /* ABS, NEG */
11224         if (size == 3 && !is_q) {
11225             unallocated_encoding(s);
11226             return;
11227         }
11228         break;
11229     case 0x3: /* SUQADD, USQADD */
11230         if (size == 3 && !is_q) {
11231             unallocated_encoding(s);
11232             return;
11233         }
11234         if (!fp_access_check(s)) {
11235             return;
11236         }
11237         handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
11238         return;
11239     case 0x7: /* SQABS, SQNEG */
11240         if (size == 3 && !is_q) {
11241             unallocated_encoding(s);
11242             return;
11243         }
11244         break;
11245     case 0xc ... 0xf:
11246     case 0x16 ... 0x1d:
11247     case 0x1f:
11248     {
11249         /* Floating point: U, size[1] and opcode indicate operation;
11250          * size[0] indicates single or double precision.
11251          */
11252         int is_double = extract32(size, 0, 1);
11253         opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
11254         size = is_double ? 3 : 2;
11255         switch (opcode) {
11256         case 0x2f: /* FABS */
11257         case 0x6f: /* FNEG */
11258             if (size == 3 && !is_q) {
11259                 unallocated_encoding(s);
11260                 return;
11261             }
11262             break;
11263         case 0x1d: /* SCVTF */
11264         case 0x5d: /* UCVTF */
11265         {
11266             bool is_signed = (opcode == 0x1d) ? true : false;
11267             int elements = is_double ? 2 : is_q ? 4 : 2;
11268             if (is_double && !is_q) {
11269                 unallocated_encoding(s);
11270                 return;
11271             }
11272             if (!fp_access_check(s)) {
11273                 return;
11274             }
11275             handle_simd_intfp_conv(s, rd, rn, elements, is_signed, 0, size);
11276             return;
11277         }
11278         case 0x2c: /* FCMGT (zero) */
11279         case 0x2d: /* FCMEQ (zero) */
11280         case 0x2e: /* FCMLT (zero) */
11281         case 0x6c: /* FCMGE (zero) */
11282         case 0x6d: /* FCMLE (zero) */
11283             if (size == 3 && !is_q) {
11284                 unallocated_encoding(s);
11285                 return;
11286             }
11287             handle_2misc_fcmp_zero(s, opcode, false, u, is_q, size, rn, rd);
11288             return;
11289         case 0x7f: /* FSQRT */
11290             if (size == 3 && !is_q) {
11291                 unallocated_encoding(s);
11292                 return;
11293             }
11294             break;
11295         case 0x1a: /* FCVTNS */
11296         case 0x1b: /* FCVTMS */
11297         case 0x3a: /* FCVTPS */
11298         case 0x3b: /* FCVTZS */
11299         case 0x5a: /* FCVTNU */
11300         case 0x5b: /* FCVTMU */
11301         case 0x7a: /* FCVTPU */
11302         case 0x7b: /* FCVTZU */
11303             need_fpstatus = true;
11304             need_rmode = true;
11305             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11306             if (size == 3 && !is_q) {
11307                 unallocated_encoding(s);
11308                 return;
11309             }
11310             break;
11311         case 0x5c: /* FCVTAU */
11312         case 0x1c: /* FCVTAS */
11313             need_fpstatus = true;
11314             need_rmode = true;
11315             rmode = FPROUNDING_TIEAWAY;
11316             if (size == 3 && !is_q) {
11317                 unallocated_encoding(s);
11318                 return;
11319             }
11320             break;
11321         case 0x3c: /* URECPE */
11322             if (size == 3) {
11323                 unallocated_encoding(s);
11324                 return;
11325             }
11326             /* fall through */
11327         case 0x3d: /* FRECPE */
11328         case 0x7d: /* FRSQRTE */
11329             if (size == 3 && !is_q) {
11330                 unallocated_encoding(s);
11331                 return;
11332             }
11333             if (!fp_access_check(s)) {
11334                 return;
11335             }
11336             handle_2misc_reciprocal(s, opcode, false, u, is_q, size, rn, rd);
11337             return;
11338         case 0x56: /* FCVTXN, FCVTXN2 */
11339             if (size == 2) {
11340                 unallocated_encoding(s);
11341                 return;
11342             }
11343             /* fall through */
11344         case 0x16: /* FCVTN, FCVTN2 */
11345             /* handle_2misc_narrow does a 2*size -> size operation, but these
11346              * instructions encode the source size rather than dest size.
11347              */
11348             if (!fp_access_check(s)) {
11349                 return;
11350             }
11351             handle_2misc_narrow(s, false, opcode, 0, is_q, size - 1, rn, rd);
11352             return;
11353         case 0x17: /* FCVTL, FCVTL2 */
11354             if (!fp_access_check(s)) {
11355                 return;
11356             }
11357             handle_2misc_widening(s, opcode, is_q, size, rn, rd);
11358             return;
11359         case 0x18: /* FRINTN */
11360         case 0x19: /* FRINTM */
11361         case 0x38: /* FRINTP */
11362         case 0x39: /* FRINTZ */
11363             need_rmode = true;
11364             rmode = extract32(opcode, 5, 1) | (extract32(opcode, 0, 1) << 1);
11365             /* fall through */
11366         case 0x59: /* FRINTX */
11367         case 0x79: /* FRINTI */
11368             need_fpstatus = true;
11369             if (size == 3 && !is_q) {
11370                 unallocated_encoding(s);
11371                 return;
11372             }
11373             break;
11374         case 0x58: /* FRINTA */
11375             need_rmode = true;
11376             rmode = FPROUNDING_TIEAWAY;
11377             need_fpstatus = true;
11378             if (size == 3 && !is_q) {
11379                 unallocated_encoding(s);
11380                 return;
11381             }
11382             break;
11383         case 0x7c: /* URSQRTE */
11384             if (size == 3) {
11385                 unallocated_encoding(s);
11386                 return;
11387             }
11388             need_fpstatus = true;
11389             break;
11390         default:
11391             unallocated_encoding(s);
11392             return;
11393         }
11394         break;
11395     }
11396     default:
11397         unallocated_encoding(s);
11398         return;
11399     }
11400
11401     if (!fp_access_check(s)) {
11402         return;
11403     }
11404
11405     if (need_fpstatus || need_rmode) {
11406         tcg_fpstatus = get_fpstatus_ptr(false);
11407     } else {
11408         tcg_fpstatus = NULL;
11409     }
11410     if (need_rmode) {
11411         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
11412         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
11413     } else {
11414         tcg_rmode = NULL;
11415     }
11416
11417     switch (opcode) {
11418     case 0x5:
11419         if (u && size == 0) { /* NOT */
11420             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_not, 0);
11421             return;
11422         }
11423         break;
11424     case 0xb:
11425         if (u) { /* NEG */
11426             gen_gvec_fn2(s, is_q, rd, rn, tcg_gen_gvec_neg, size);
11427             return;
11428         }
11429         break;
11430     }
11431
11432     if (size == 3) {
11433         /* All 64-bit element operations can be shared with scalar 2misc */
11434         int pass;
11435
11436         for (pass = 0; pass < (is_q ? 2 : 1); pass++) {
11437             TCGv_i64 tcg_op = tcg_temp_new_i64();
11438             TCGv_i64 tcg_res = tcg_temp_new_i64();
11439
11440             read_vec_element(s, tcg_op, rn, pass, MO_64);
11441
11442             handle_2misc_64(s, opcode, u, tcg_res, tcg_op,
11443                             tcg_rmode, tcg_fpstatus);
11444
11445             write_vec_element(s, tcg_res, rd, pass, MO_64);
11446
11447             tcg_temp_free_i64(tcg_res);
11448             tcg_temp_free_i64(tcg_op);
11449         }
11450     } else {
11451         int pass;
11452
11453         for (pass = 0; pass < (is_q ? 4 : 2); pass++) {
11454             TCGv_i32 tcg_op = tcg_temp_new_i32();
11455             TCGv_i32 tcg_res = tcg_temp_new_i32();
11456             TCGCond cond;
11457
11458             read_vec_element_i32(s, tcg_op, rn, pass, MO_32);
11459
11460             if (size == 2) {
11461                 /* Special cases for 32 bit elements */
11462                 switch (opcode) {
11463                 case 0xa: /* CMLT */
11464                     /* 32 bit integer comparison against zero, result is
11465                      * test ? (2^32 - 1) : 0. We implement via setcond(test)
11466                      * and inverting.
11467                      */
11468                     cond = TCG_COND_LT;
11469                 do_cmop:
11470                     tcg_gen_setcondi_i32(cond, tcg_res, tcg_op, 0);
11471                     tcg_gen_neg_i32(tcg_res, tcg_res);
11472                     break;
11473                 case 0x8: /* CMGT, CMGE */
11474                     cond = u ? TCG_COND_GE : TCG_COND_GT;
11475                     goto do_cmop;
11476                 case 0x9: /* CMEQ, CMLE */
11477                     cond = u ? TCG_COND_LE : TCG_COND_EQ;
11478                     goto do_cmop;
11479                 case 0x4: /* CLS */
11480                     if (u) {
11481                         tcg_gen_clzi_i32(tcg_res, tcg_op, 32);
11482                     } else {
11483                         tcg_gen_clrsb_i32(tcg_res, tcg_op);
11484                     }
11485                     break;
11486                 case 0x7: /* SQABS, SQNEG */
11487                     if (u) {
11488                         gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
11489                     } else {
11490                         gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
11491                     }
11492                     break;
11493                 case 0xb: /* ABS, NEG */
11494                     if (u) {
11495                         tcg_gen_neg_i32(tcg_res, tcg_op);
11496                     } else {
11497                         TCGv_i32 tcg_zero = tcg_const_i32(0);
11498                         tcg_gen_neg_i32(tcg_res, tcg_op);
11499                         tcg_gen_movcond_i32(TCG_COND_GT, tcg_res, tcg_op,
11500                                             tcg_zero, tcg_op, tcg_res);
11501                         tcg_temp_free_i32(tcg_zero);
11502                     }
11503                     break;
11504                 case 0x2f: /* FABS */
11505                     gen_helper_vfp_abss(tcg_res, tcg_op);
11506                     break;
11507                 case 0x6f: /* FNEG */
11508                     gen_helper_vfp_negs(tcg_res, tcg_op);
11509                     break;
11510                 case 0x7f: /* FSQRT */
11511                     gen_helper_vfp_sqrts(tcg_res, tcg_op, cpu_env);
11512                     break;
11513                 case 0x1a: /* FCVTNS */
11514                 case 0x1b: /* FCVTMS */
11515                 case 0x1c: /* FCVTAS */
11516                 case 0x3a: /* FCVTPS */
11517                 case 0x3b: /* FCVTZS */
11518                 {
11519                     TCGv_i32 tcg_shift = tcg_const_i32(0);
11520                     gen_helper_vfp_tosls(tcg_res, tcg_op,
11521                                          tcg_shift, tcg_fpstatus);
11522                     tcg_temp_free_i32(tcg_shift);
11523                     break;
11524                 }
11525                 case 0x5a: /* FCVTNU */
11526                 case 0x5b: /* FCVTMU */
11527                 case 0x5c: /* FCVTAU */
11528                 case 0x7a: /* FCVTPU */
11529                 case 0x7b: /* FCVTZU */
11530                 {
11531                     TCGv_i32 tcg_shift = tcg_const_i32(0);
11532                     gen_helper_vfp_touls(tcg_res, tcg_op,
11533                                          tcg_shift, tcg_fpstatus);
11534                     tcg_temp_free_i32(tcg_shift);
11535                     break;
11536                 }
11537                 case 0x18: /* FRINTN */
11538                 case 0x19: /* FRINTM */
11539                 case 0x38: /* FRINTP */
11540                 case 0x39: /* FRINTZ */
11541                 case 0x58: /* FRINTA */
11542                 case 0x79: /* FRINTI */
11543                     gen_helper_rints(tcg_res, tcg_op, tcg_fpstatus);
11544                     break;
11545                 case 0x59: /* FRINTX */
11546                     gen_helper_rints_exact(tcg_res, tcg_op, tcg_fpstatus);
11547                     break;
11548                 case 0x7c: /* URSQRTE */
11549                     gen_helper_rsqrte_u32(tcg_res, tcg_op, tcg_fpstatus);
11550                     break;
11551                 default:
11552                     g_assert_not_reached();
11553                 }
11554             } else {
11555                 /* Use helpers for 8 and 16 bit elements */
11556                 switch (opcode) {
11557                 case 0x5: /* CNT, RBIT */
11558                     /* For these two insns size is part of the opcode specifier
11559                      * (handled earlier); they always operate on byte elements.
11560                      */
11561                     if (u) {
11562                         gen_helper_neon_rbit_u8(tcg_res, tcg_op);
11563                     } else {
11564                         gen_helper_neon_cnt_u8(tcg_res, tcg_op);
11565                     }
11566                     break;
11567                 case 0x7: /* SQABS, SQNEG */
11568                 {
11569                     NeonGenOneOpEnvFn *genfn;
11570                     static NeonGenOneOpEnvFn * const fns[2][2] = {
11571                         { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
11572                         { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
11573                     };
11574                     genfn = fns[size][u];
11575                     genfn(tcg_res, cpu_env, tcg_op);
11576                     break;
11577                 }
11578                 case 0x8: /* CMGT, CMGE */
11579                 case 0x9: /* CMEQ, CMLE */
11580                 case 0xa: /* CMLT */
11581                 {
11582                     static NeonGenTwoOpFn * const fns[3][2] = {
11583                         { gen_helper_neon_cgt_s8, gen_helper_neon_cgt_s16 },
11584                         { gen_helper_neon_cge_s8, gen_helper_neon_cge_s16 },
11585                         { gen_helper_neon_ceq_u8, gen_helper_neon_ceq_u16 },
11586                     };
11587                     NeonGenTwoOpFn *genfn;
11588                     int comp;
11589                     bool reverse;
11590                     TCGv_i32 tcg_zero = tcg_const_i32(0);
11591
11592                     /* comp = index into [CMGT, CMGE, CMEQ, CMLE, CMLT] */
11593                     comp = (opcode - 0x8) * 2 + u;
11594                     /* ...but LE, LT are implemented as reverse GE, GT */
11595                     reverse = (comp > 2);
11596                     if (reverse) {
11597                         comp = 4 - comp;
11598                     }
11599                     genfn = fns[comp][size];
11600                     if (reverse) {
11601                         genfn(tcg_res, tcg_zero, tcg_op);
11602                     } else {
11603                         genfn(tcg_res, tcg_op, tcg_zero);
11604                     }
11605                     tcg_temp_free_i32(tcg_zero);
11606                     break;
11607                 }
11608                 case 0xb: /* ABS, NEG */
11609                     if (u) {
11610                         TCGv_i32 tcg_zero = tcg_const_i32(0);
11611                         if (size) {
11612                             gen_helper_neon_sub_u16(tcg_res, tcg_zero, tcg_op);
11613                         } else {
11614                             gen_helper_neon_sub_u8(tcg_res, tcg_zero, tcg_op);
11615                         }
11616                         tcg_temp_free_i32(tcg_zero);
11617                     } else {
11618                         if (size) {
11619                             gen_helper_neon_abs_s16(tcg_res, tcg_op);
11620                         } else {
11621                             gen_helper_neon_abs_s8(tcg_res, tcg_op);
11622                         }
11623                     }
11624                     break;
11625                 case 0x4: /* CLS, CLZ */
11626                     if (u) {
11627                         if (size == 0) {
11628                             gen_helper_neon_clz_u8(tcg_res, tcg_op);
11629                         } else {
11630                             gen_helper_neon_clz_u16(tcg_res, tcg_op);
11631                         }
11632                     } else {
11633                         if (size == 0) {
11634                             gen_helper_neon_cls_s8(tcg_res, tcg_op);
11635                         } else {
11636                             gen_helper_neon_cls_s16(tcg_res, tcg_op);
11637                         }
11638                     }
11639                     break;
11640                 default:
11641                     g_assert_not_reached();
11642                 }
11643             }
11644
11645             write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
11646
11647             tcg_temp_free_i32(tcg_res);
11648             tcg_temp_free_i32(tcg_op);
11649         }
11650     }
11651     clear_vec_high(s, is_q, rd);
11652
11653     if (need_rmode) {
11654         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
11655         tcg_temp_free_i32(tcg_rmode);
11656     }
11657     if (need_fpstatus) {
11658         tcg_temp_free_ptr(tcg_fpstatus);
11659     }
11660 }
11661
11662 /* AdvSIMD [scalar] two register miscellaneous (FP16)
11663  *
11664  *   31  30  29 28  27     24  23 22 21       17 16    12 11 10 9    5 4    0
11665  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
11666  * | 0 | Q | U | S | 1 1 1 0 | a | 1 1 1 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
11667  * +---+---+---+---+---------+---+-------------+--------+-----+------+------+
11668  *   mask: 1000 1111 0111 1110 0000 1100 0000 0000 0x8f7e 0c00
11669  *   val:  0000 1110 0111 1000 0000 1000 0000 0000 0x0e78 0800
11670  *
11671  * This actually covers two groups where scalar access is governed by
11672  * bit 28. A bunch of the instructions (float to integral) only exist
11673  * in the vector form and are un-allocated for the scalar decode. Also
11674  * in the scalar decode Q is always 1.
11675  */
11676 static void disas_simd_two_reg_misc_fp16(DisasContext *s, uint32_t insn)
11677 {
11678     int fpop, opcode, a, u;
11679     int rn, rd;
11680     bool is_q;
11681     bool is_scalar;
11682     bool only_in_vector = false;
11683
11684     int pass;
11685     TCGv_i32 tcg_rmode = NULL;
11686     TCGv_ptr tcg_fpstatus = NULL;
11687     bool need_rmode = false;
11688     bool need_fpst = true;
11689     int rmode;
11690
11691     if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
11692         unallocated_encoding(s);
11693         return;
11694     }
11695
11696     rd = extract32(insn, 0, 5);
11697     rn = extract32(insn, 5, 5);
11698
11699     a = extract32(insn, 23, 1);
11700     u = extract32(insn, 29, 1);
11701     is_scalar = extract32(insn, 28, 1);
11702     is_q = extract32(insn, 30, 1);
11703
11704     opcode = extract32(insn, 12, 5);
11705     fpop = deposit32(opcode, 5, 1, a);
11706     fpop = deposit32(fpop, 6, 1, u);
11707
11708     rd = extract32(insn, 0, 5);
11709     rn = extract32(insn, 5, 5);
11710
11711     switch (fpop) {
11712     case 0x1d: /* SCVTF */
11713     case 0x5d: /* UCVTF */
11714     {
11715         int elements;
11716
11717         if (is_scalar) {
11718             elements = 1;
11719         } else {
11720             elements = (is_q ? 8 : 4);
11721         }
11722
11723         if (!fp_access_check(s)) {
11724             return;
11725         }
11726         handle_simd_intfp_conv(s, rd, rn, elements, !u, 0, MO_16);
11727         return;
11728     }
11729     break;
11730     case 0x2c: /* FCMGT (zero) */
11731     case 0x2d: /* FCMEQ (zero) */
11732     case 0x2e: /* FCMLT (zero) */
11733     case 0x6c: /* FCMGE (zero) */
11734     case 0x6d: /* FCMLE (zero) */
11735         handle_2misc_fcmp_zero(s, fpop, is_scalar, 0, is_q, MO_16, rn, rd);
11736         return;
11737     case 0x3d: /* FRECPE */
11738     case 0x3f: /* FRECPX */
11739         break;
11740     case 0x18: /* FRINTN */
11741         need_rmode = true;
11742         only_in_vector = true;
11743         rmode = FPROUNDING_TIEEVEN;
11744         break;
11745     case 0x19: /* FRINTM */
11746         need_rmode = true;
11747         only_in_vector = true;
11748         rmode = FPROUNDING_NEGINF;
11749         break;
11750     case 0x38: /* FRINTP */
11751         need_rmode = true;
11752         only_in_vector = true;
11753         rmode = FPROUNDING_POSINF;
11754         break;
11755     case 0x39: /* FRINTZ */
11756         need_rmode = true;
11757         only_in_vector = true;
11758         rmode = FPROUNDING_ZERO;
11759         break;
11760     case 0x58: /* FRINTA */
11761         need_rmode = true;
11762         only_in_vector = true;
11763         rmode = FPROUNDING_TIEAWAY;
11764         break;
11765     case 0x59: /* FRINTX */
11766     case 0x79: /* FRINTI */
11767         only_in_vector = true;
11768         /* current rounding mode */
11769         break;
11770     case 0x1a: /* FCVTNS */
11771         need_rmode = true;
11772         rmode = FPROUNDING_TIEEVEN;
11773         break;
11774     case 0x1b: /* FCVTMS */
11775         need_rmode = true;
11776         rmode = FPROUNDING_NEGINF;
11777         break;
11778     case 0x1c: /* FCVTAS */
11779         need_rmode = true;
11780         rmode = FPROUNDING_TIEAWAY;
11781         break;
11782     case 0x3a: /* FCVTPS */
11783         need_rmode = true;
11784         rmode = FPROUNDING_POSINF;
11785         break;
11786     case 0x3b: /* FCVTZS */
11787         need_rmode = true;
11788         rmode = FPROUNDING_ZERO;
11789         break;
11790     case 0x5a: /* FCVTNU */
11791         need_rmode = true;
11792         rmode = FPROUNDING_TIEEVEN;
11793         break;
11794     case 0x5b: /* FCVTMU */
11795         need_rmode = true;
11796         rmode = FPROUNDING_NEGINF;
11797         break;
11798     case 0x5c: /* FCVTAU */
11799         need_rmode = true;
11800         rmode = FPROUNDING_TIEAWAY;
11801         break;
11802     case 0x7a: /* FCVTPU */
11803         need_rmode = true;
11804         rmode = FPROUNDING_POSINF;
11805         break;
11806     case 0x7b: /* FCVTZU */
11807         need_rmode = true;
11808         rmode = FPROUNDING_ZERO;
11809         break;
11810     case 0x2f: /* FABS */
11811     case 0x6f: /* FNEG */
11812         need_fpst = false;
11813         break;
11814     case 0x7d: /* FRSQRTE */
11815     case 0x7f: /* FSQRT (vector) */
11816         break;
11817     default:
11818         fprintf(stderr, "%s: insn %#04x fpop %#2x\n", __func__, insn, fpop);
11819         g_assert_not_reached();
11820     }
11821
11822
11823     /* Check additional constraints for the scalar encoding */
11824     if (is_scalar) {
11825         if (!is_q) {
11826             unallocated_encoding(s);
11827             return;
11828         }
11829         /* FRINTxx is only in the vector form */
11830         if (only_in_vector) {
11831             unallocated_encoding(s);
11832             return;
11833         }
11834     }
11835
11836     if (!fp_access_check(s)) {
11837         return;
11838     }
11839
11840     if (need_rmode || need_fpst) {
11841         tcg_fpstatus = get_fpstatus_ptr(true);
11842     }
11843
11844     if (need_rmode) {
11845         tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
11846         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
11847     }
11848
11849     if (is_scalar) {
11850         TCGv_i32 tcg_op = tcg_temp_new_i32();
11851         TCGv_i32 tcg_res = tcg_temp_new_i32();
11852
11853         read_vec_element_i32(s, tcg_op, rn, 0, MO_16);
11854
11855         switch (fpop) {
11856         case 0x1a: /* FCVTNS */
11857         case 0x1b: /* FCVTMS */
11858         case 0x1c: /* FCVTAS */
11859         case 0x3a: /* FCVTPS */
11860         case 0x3b: /* FCVTZS */
11861             gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
11862             break;
11863         case 0x3d: /* FRECPE */
11864             gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
11865             break;
11866         case 0x3f: /* FRECPX */
11867             gen_helper_frecpx_f16(tcg_res, tcg_op, tcg_fpstatus);
11868             break;
11869         case 0x5a: /* FCVTNU */
11870         case 0x5b: /* FCVTMU */
11871         case 0x5c: /* FCVTAU */
11872         case 0x7a: /* FCVTPU */
11873         case 0x7b: /* FCVTZU */
11874             gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
11875             break;
11876         case 0x6f: /* FNEG */
11877             tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
11878             break;
11879         case 0x7d: /* FRSQRTE */
11880             gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
11881             break;
11882         default:
11883             g_assert_not_reached();
11884         }
11885
11886         /* limit any sign extension going on */
11887         tcg_gen_andi_i32(tcg_res, tcg_res, 0xffff);
11888         write_fp_sreg(s, rd, tcg_res);
11889
11890         tcg_temp_free_i32(tcg_res);
11891         tcg_temp_free_i32(tcg_op);
11892     } else {
11893         for (pass = 0; pass < (is_q ? 8 : 4); pass++) {
11894             TCGv_i32 tcg_op = tcg_temp_new_i32();
11895             TCGv_i32 tcg_res = tcg_temp_new_i32();
11896
11897             read_vec_element_i32(s, tcg_op, rn, pass, MO_16);
11898
11899             switch (fpop) {
11900             case 0x1a: /* FCVTNS */
11901             case 0x1b: /* FCVTMS */
11902             case 0x1c: /* FCVTAS */
11903             case 0x3a: /* FCVTPS */
11904             case 0x3b: /* FCVTZS */
11905                 gen_helper_advsimd_f16tosinth(tcg_res, tcg_op, tcg_fpstatus);
11906                 break;
11907             case 0x3d: /* FRECPE */
11908                 gen_helper_recpe_f16(tcg_res, tcg_op, tcg_fpstatus);
11909                 break;
11910             case 0x5a: /* FCVTNU */
11911             case 0x5b: /* FCVTMU */
11912             case 0x5c: /* FCVTAU */
11913             case 0x7a: /* FCVTPU */
11914             case 0x7b: /* FCVTZU */
11915                 gen_helper_advsimd_f16touinth(tcg_res, tcg_op, tcg_fpstatus);
11916                 break;
11917             case 0x18: /* FRINTN */
11918             case 0x19: /* FRINTM */
11919             case 0x38: /* FRINTP */
11920             case 0x39: /* FRINTZ */
11921             case 0x58: /* FRINTA */
11922             case 0x79: /* FRINTI */
11923                 gen_helper_advsimd_rinth(tcg_res, tcg_op, tcg_fpstatus);
11924                 break;
11925             case 0x59: /* FRINTX */
11926                 gen_helper_advsimd_rinth_exact(tcg_res, tcg_op, tcg_fpstatus);
11927                 break;
11928             case 0x2f: /* FABS */
11929                 tcg_gen_andi_i32(tcg_res, tcg_op, 0x7fff);
11930                 break;
11931             case 0x6f: /* FNEG */
11932                 tcg_gen_xori_i32(tcg_res, tcg_op, 0x8000);
11933                 break;
11934             case 0x7d: /* FRSQRTE */
11935                 gen_helper_rsqrte_f16(tcg_res, tcg_op, tcg_fpstatus);
11936                 break;
11937             case 0x7f: /* FSQRT */
11938                 gen_helper_sqrt_f16(tcg_res, tcg_op, tcg_fpstatus);
11939                 break;
11940             default:
11941                 g_assert_not_reached();
11942             }
11943
11944             write_vec_element_i32(s, tcg_res, rd, pass, MO_16);
11945
11946             tcg_temp_free_i32(tcg_res);
11947             tcg_temp_free_i32(tcg_op);
11948         }
11949
11950         clear_vec_high(s, is_q, rd);
11951     }
11952
11953     if (tcg_rmode) {
11954         gen_helper_set_rmode(tcg_rmode, tcg_rmode, tcg_fpstatus);
11955         tcg_temp_free_i32(tcg_rmode);
11956     }
11957
11958     if (tcg_fpstatus) {
11959         tcg_temp_free_ptr(tcg_fpstatus);
11960     }
11961 }
11962
11963 /* AdvSIMD scalar x indexed element
11964  *  31 30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
11965  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
11966  * | 0 1 | U | 1 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
11967  * +-----+---+-----------+------+---+---+------+-----+---+---+------+------+
11968  * AdvSIMD vector x indexed element
11969  *   31  30  29 28       24 23  22 21  20  19  16 15 12  11  10 9    5 4    0
11970  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
11971  * | 0 | Q | U | 0 1 1 1 1 | size | L | M |  Rm  | opc | H | 0 |  Rn  |  Rd  |
11972  * +---+---+---+-----------+------+---+---+------+-----+---+---+------+------+
11973  */
11974 static void disas_simd_indexed(DisasContext *s, uint32_t insn)
11975 {
11976     /* This encoding has two kinds of instruction:
11977      *  normal, where we perform elt x idxelt => elt for each
11978      *     element in the vector
11979      *  long, where we perform elt x idxelt and generate a result of
11980      *     double the width of the input element
11981      * The long ops have a 'part' specifier (ie come in INSN, INSN2 pairs).
11982      */
11983     bool is_scalar = extract32(insn, 28, 1);
11984     bool is_q = extract32(insn, 30, 1);
11985     bool u = extract32(insn, 29, 1);
11986     int size = extract32(insn, 22, 2);
11987     int l = extract32(insn, 21, 1);
11988     int m = extract32(insn, 20, 1);
11989     /* Note that the Rm field here is only 4 bits, not 5 as it usually is */
11990     int rm = extract32(insn, 16, 4);
11991     int opcode = extract32(insn, 12, 4);
11992     int h = extract32(insn, 11, 1);
11993     int rn = extract32(insn, 5, 5);
11994     int rd = extract32(insn, 0, 5);
11995     bool is_long = false;
11996     bool is_fp = false;
11997     bool is_fp16 = false;
11998     int index;
11999     TCGv_ptr fpst;
12000
12001     switch (16 * u + opcode) {
12002     case 0x08: /* MUL */
12003     case 0x10: /* MLA */
12004     case 0x14: /* MLS */
12005         if (is_scalar) {
12006             unallocated_encoding(s);
12007             return;
12008         }
12009         break;
12010     case 0x02: /* SMLAL, SMLAL2 */
12011     case 0x12: /* UMLAL, UMLAL2 */
12012     case 0x06: /* SMLSL, SMLSL2 */
12013     case 0x16: /* UMLSL, UMLSL2 */
12014     case 0x0a: /* SMULL, SMULL2 */
12015     case 0x1a: /* UMULL, UMULL2 */
12016         if (is_scalar) {
12017             unallocated_encoding(s);
12018             return;
12019         }
12020         is_long = true;
12021         break;
12022     case 0x03: /* SQDMLAL, SQDMLAL2 */
12023     case 0x07: /* SQDMLSL, SQDMLSL2 */
12024     case 0x0b: /* SQDMULL, SQDMULL2 */
12025         is_long = true;
12026         break;
12027     case 0x0c: /* SQDMULH */
12028     case 0x0d: /* SQRDMULH */
12029         break;
12030     case 0x01: /* FMLA */
12031     case 0x05: /* FMLS */
12032     case 0x09: /* FMUL */
12033     case 0x19: /* FMULX */
12034         is_fp = true;
12035         break;
12036     case 0x1d: /* SQRDMLAH */
12037     case 0x1f: /* SQRDMLSH */
12038         if (!arm_dc_feature(s, ARM_FEATURE_V8_RDM)) {
12039             unallocated_encoding(s);
12040             return;
12041         }
12042         break;
12043     default:
12044         unallocated_encoding(s);
12045         return;
12046     }
12047
12048     if (is_fp) {
12049         /* convert insn encoded size to TCGMemOp size */
12050         switch (size) {
12051         case 0: /* half-precision */
12052             if (!arm_dc_feature(s, ARM_FEATURE_V8_FP16)) {
12053                 unallocated_encoding(s);
12054                 return;
12055             }
12056             size = MO_16;
12057             break;
12058         case MO_32: /* single precision */
12059         case MO_64: /* double precision */
12060             break;
12061         default:
12062             unallocated_encoding(s);
12063             return;
12064         }
12065     } else {
12066         switch (size) {
12067         case MO_8:
12068         case MO_64:
12069             unallocated_encoding(s);
12070             return;
12071         }
12072     }
12073
12074     /* Given TCGMemOp size, adjust register and indexing.  */
12075     switch (size) {
12076     case MO_16:
12077         index = h << 2 | l << 1 | m;
12078         break;
12079     case MO_32:
12080         index = h << 1 | l;
12081         rm |= m << 4;
12082         break;
12083     case MO_64:
12084         if (l || !is_q) {
12085             unallocated_encoding(s);
12086             return;
12087         }
12088         index = h;
12089         rm |= m << 4;
12090         break;
12091     default:
12092         g_assert_not_reached();
12093     }
12094
12095     if (!fp_access_check(s)) {
12096         return;
12097     }
12098
12099     if (is_fp) {
12100         fpst = get_fpstatus_ptr(is_fp16);
12101     } else {
12102         fpst = NULL;
12103     }
12104
12105     if (size == 3) {
12106         TCGv_i64 tcg_idx = tcg_temp_new_i64();
12107         int pass;
12108
12109         assert(is_fp && is_q && !is_long);
12110
12111         read_vec_element(s, tcg_idx, rm, index, MO_64);
12112
12113         for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12114             TCGv_i64 tcg_op = tcg_temp_new_i64();
12115             TCGv_i64 tcg_res = tcg_temp_new_i64();
12116
12117             read_vec_element(s, tcg_op, rn, pass, MO_64);
12118
12119             switch (16 * u + opcode) {
12120             case 0x05: /* FMLS */
12121                 /* As usual for ARM, separate negation for fused multiply-add */
12122                 gen_helper_vfp_negd(tcg_op, tcg_op);
12123                 /* fall through */
12124             case 0x01: /* FMLA */
12125                 read_vec_element(s, tcg_res, rd, pass, MO_64);
12126                 gen_helper_vfp_muladdd(tcg_res, tcg_op, tcg_idx, tcg_res, fpst);
12127                 break;
12128             case 0x09: /* FMUL */
12129                 gen_helper_vfp_muld(tcg_res, tcg_op, tcg_idx, fpst);
12130                 break;
12131             case 0x19: /* FMULX */
12132                 gen_helper_vfp_mulxd(tcg_res, tcg_op, tcg_idx, fpst);
12133                 break;
12134             default:
12135                 g_assert_not_reached();
12136             }
12137
12138             write_vec_element(s, tcg_res, rd, pass, MO_64);
12139             tcg_temp_free_i64(tcg_op);
12140             tcg_temp_free_i64(tcg_res);
12141         }
12142
12143         tcg_temp_free_i64(tcg_idx);
12144         clear_vec_high(s, !is_scalar, rd);
12145     } else if (!is_long) {
12146         /* 32 bit floating point, or 16 or 32 bit integer.
12147          * For the 16 bit scalar case we use the usual Neon helpers and
12148          * rely on the fact that 0 op 0 == 0 with no side effects.
12149          */
12150         TCGv_i32 tcg_idx = tcg_temp_new_i32();
12151         int pass, maxpasses;
12152
12153         if (is_scalar) {
12154             maxpasses = 1;
12155         } else {
12156             maxpasses = is_q ? 4 : 2;
12157         }
12158
12159         read_vec_element_i32(s, tcg_idx, rm, index, size);
12160
12161         if (size == 1 && !is_scalar) {
12162             /* The simplest way to handle the 16x16 indexed ops is to duplicate
12163              * the index into both halves of the 32 bit tcg_idx and then use
12164              * the usual Neon helpers.
12165              */
12166             tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
12167         }
12168
12169         for (pass = 0; pass < maxpasses; pass++) {
12170             TCGv_i32 tcg_op = tcg_temp_new_i32();
12171             TCGv_i32 tcg_res = tcg_temp_new_i32();
12172
12173             read_vec_element_i32(s, tcg_op, rn, pass, is_scalar ? size : MO_32);
12174
12175             switch (16 * u + opcode) {
12176             case 0x08: /* MUL */
12177             case 0x10: /* MLA */
12178             case 0x14: /* MLS */
12179             {
12180                 static NeonGenTwoOpFn * const fns[2][2] = {
12181                     { gen_helper_neon_add_u16, gen_helper_neon_sub_u16 },
12182                     { tcg_gen_add_i32, tcg_gen_sub_i32 },
12183                 };
12184                 NeonGenTwoOpFn *genfn;
12185                 bool is_sub = opcode == 0x4;
12186
12187                 if (size == 1) {
12188                     gen_helper_neon_mul_u16(tcg_res, tcg_op, tcg_idx);
12189                 } else {
12190                     tcg_gen_mul_i32(tcg_res, tcg_op, tcg_idx);
12191                 }
12192                 if (opcode == 0x8) {
12193                     break;
12194                 }
12195                 read_vec_element_i32(s, tcg_op, rd, pass, MO_32);
12196                 genfn = fns[size - 1][is_sub];
12197                 genfn(tcg_res, tcg_op, tcg_res);
12198                 break;
12199             }
12200             case 0x05: /* FMLS */
12201             case 0x01: /* FMLA */
12202                 read_vec_element_i32(s, tcg_res, rd, pass,
12203                                      is_scalar ? size : MO_32);
12204                 switch (size) {
12205                 case 1:
12206                     if (opcode == 0x5) {
12207                         /* As usual for ARM, separate negation for fused
12208                          * multiply-add */
12209                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80008000);
12210                     }
12211                     if (is_scalar) {
12212                         gen_helper_advsimd_muladdh(tcg_res, tcg_op, tcg_idx,
12213                                                    tcg_res, fpst);
12214                     } else {
12215                         gen_helper_advsimd_muladd2h(tcg_res, tcg_op, tcg_idx,
12216                                                     tcg_res, fpst);
12217                     }
12218                     break;
12219                 case 2:
12220                     if (opcode == 0x5) {
12221                         /* As usual for ARM, separate negation for
12222                          * fused multiply-add */
12223                         tcg_gen_xori_i32(tcg_op, tcg_op, 0x80000000);
12224                     }
12225                     gen_helper_vfp_muladds(tcg_res, tcg_op, tcg_idx,
12226                                            tcg_res, fpst);
12227                     break;
12228                 default:
12229                     g_assert_not_reached();
12230                 }
12231                 break;
12232             case 0x09: /* FMUL */
12233                 switch (size) {
12234                 case 1:
12235                     if (is_scalar) {
12236                         gen_helper_advsimd_mulh(tcg_res, tcg_op,
12237                                                 tcg_idx, fpst);
12238                     } else {
12239                         gen_helper_advsimd_mul2h(tcg_res, tcg_op,
12240                                                  tcg_idx, fpst);
12241                     }
12242                     break;
12243                 case 2:
12244                     gen_helper_vfp_muls(tcg_res, tcg_op, tcg_idx, fpst);
12245                     break;
12246                 default:
12247                     g_assert_not_reached();
12248                 }
12249                 break;
12250             case 0x19: /* FMULX */
12251                 switch (size) {
12252                 case 1:
12253                     if (is_scalar) {
12254                         gen_helper_advsimd_mulxh(tcg_res, tcg_op,
12255                                                  tcg_idx, fpst);
12256                     } else {
12257                         gen_helper_advsimd_mulx2h(tcg_res, tcg_op,
12258                                                   tcg_idx, fpst);
12259                     }
12260                     break;
12261                 case 2:
12262                     gen_helper_vfp_mulxs(tcg_res, tcg_op, tcg_idx, fpst);
12263                     break;
12264                 default:
12265                     g_assert_not_reached();
12266                 }
12267                 break;
12268             case 0x0c: /* SQDMULH */
12269                 if (size == 1) {
12270                     gen_helper_neon_qdmulh_s16(tcg_res, cpu_env,
12271                                                tcg_op, tcg_idx);
12272                 } else {
12273                     gen_helper_neon_qdmulh_s32(tcg_res, cpu_env,
12274                                                tcg_op, tcg_idx);
12275                 }
12276                 break;
12277             case 0x0d: /* SQRDMULH */
12278                 if (size == 1) {
12279                     gen_helper_neon_qrdmulh_s16(tcg_res, cpu_env,
12280                                                 tcg_op, tcg_idx);
12281                 } else {
12282                     gen_helper_neon_qrdmulh_s32(tcg_res, cpu_env,
12283                                                 tcg_op, tcg_idx);
12284                 }
12285                 break;
12286             case 0x1d: /* SQRDMLAH */
12287                 read_vec_element_i32(s, tcg_res, rd, pass,
12288                                      is_scalar ? size : MO_32);
12289                 if (size == 1) {
12290                     gen_helper_neon_qrdmlah_s16(tcg_res, cpu_env,
12291                                                 tcg_op, tcg_idx, tcg_res);
12292                 } else {
12293                     gen_helper_neon_qrdmlah_s32(tcg_res, cpu_env,
12294                                                 tcg_op, tcg_idx, tcg_res);
12295                 }
12296                 break;
12297             case 0x1f: /* SQRDMLSH */
12298                 read_vec_element_i32(s, tcg_res, rd, pass,
12299                                      is_scalar ? size : MO_32);
12300                 if (size == 1) {
12301                     gen_helper_neon_qrdmlsh_s16(tcg_res, cpu_env,
12302                                                 tcg_op, tcg_idx, tcg_res);
12303                 } else {
12304                     gen_helper_neon_qrdmlsh_s32(tcg_res, cpu_env,
12305                                                 tcg_op, tcg_idx, tcg_res);
12306                 }
12307                 break;
12308             default:
12309                 g_assert_not_reached();
12310             }
12311
12312             if (is_scalar) {
12313                 write_fp_sreg(s, rd, tcg_res);
12314             } else {
12315                 write_vec_element_i32(s, tcg_res, rd, pass, MO_32);
12316             }
12317
12318             tcg_temp_free_i32(tcg_op);
12319             tcg_temp_free_i32(tcg_res);
12320         }
12321
12322         tcg_temp_free_i32(tcg_idx);
12323         clear_vec_high(s, is_q, rd);
12324     } else {
12325         /* long ops: 16x16->32 or 32x32->64 */
12326         TCGv_i64 tcg_res[2];
12327         int pass;
12328         bool satop = extract32(opcode, 0, 1);
12329         TCGMemOp memop = MO_32;
12330
12331         if (satop || !u) {
12332             memop |= MO_SIGN;
12333         }
12334
12335         if (size == 2) {
12336             TCGv_i64 tcg_idx = tcg_temp_new_i64();
12337
12338             read_vec_element(s, tcg_idx, rm, index, memop);
12339
12340             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12341                 TCGv_i64 tcg_op = tcg_temp_new_i64();
12342                 TCGv_i64 tcg_passres;
12343                 int passelt;
12344
12345                 if (is_scalar) {
12346                     passelt = 0;
12347                 } else {
12348                     passelt = pass + (is_q * 2);
12349                 }
12350
12351                 read_vec_element(s, tcg_op, rn, passelt, memop);
12352
12353                 tcg_res[pass] = tcg_temp_new_i64();
12354
12355                 if (opcode == 0xa || opcode == 0xb) {
12356                     /* Non-accumulating ops */
12357                     tcg_passres = tcg_res[pass];
12358                 } else {
12359                     tcg_passres = tcg_temp_new_i64();
12360                 }
12361
12362                 tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx);
12363                 tcg_temp_free_i64(tcg_op);
12364
12365                 if (satop) {
12366                     /* saturating, doubling */
12367                     gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
12368                                                       tcg_passres, tcg_passres);
12369                 }
12370
12371                 if (opcode == 0xa || opcode == 0xb) {
12372                     continue;
12373                 }
12374
12375                 /* Accumulating op: handle accumulate step */
12376                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12377
12378                 switch (opcode) {
12379                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
12380                     tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
12381                     break;
12382                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
12383                     tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
12384                     break;
12385                 case 0x7: /* SQDMLSL, SQDMLSL2 */
12386                     tcg_gen_neg_i64(tcg_passres, tcg_passres);
12387                     /* fall through */
12388                 case 0x3: /* SQDMLAL, SQDMLAL2 */
12389                     gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
12390                                                       tcg_res[pass],
12391                                                       tcg_passres);
12392                     break;
12393                 default:
12394                     g_assert_not_reached();
12395                 }
12396                 tcg_temp_free_i64(tcg_passres);
12397             }
12398             tcg_temp_free_i64(tcg_idx);
12399
12400             clear_vec_high(s, !is_scalar, rd);
12401         } else {
12402             TCGv_i32 tcg_idx = tcg_temp_new_i32();
12403
12404             assert(size == 1);
12405             read_vec_element_i32(s, tcg_idx, rm, index, size);
12406
12407             if (!is_scalar) {
12408                 /* The simplest way to handle the 16x16 indexed ops is to
12409                  * duplicate the index into both halves of the 32 bit tcg_idx
12410                  * and then use the usual Neon helpers.
12411                  */
12412                 tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16);
12413             }
12414
12415             for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
12416                 TCGv_i32 tcg_op = tcg_temp_new_i32();
12417                 TCGv_i64 tcg_passres;
12418
12419                 if (is_scalar) {
12420                     read_vec_element_i32(s, tcg_op, rn, pass, size);
12421                 } else {
12422                     read_vec_element_i32(s, tcg_op, rn,
12423                                          pass + (is_q * 2), MO_32);
12424                 }
12425
12426                 tcg_res[pass] = tcg_temp_new_i64();
12427
12428                 if (opcode == 0xa || opcode == 0xb) {
12429                     /* Non-accumulating ops */
12430                     tcg_passres = tcg_res[pass];
12431                 } else {
12432                     tcg_passres = tcg_temp_new_i64();
12433                 }
12434
12435                 if (memop & MO_SIGN) {
12436                     gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx);
12437                 } else {
12438                     gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx);
12439                 }
12440                 if (satop) {
12441                     gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
12442                                                       tcg_passres, tcg_passres);
12443                 }
12444                 tcg_temp_free_i32(tcg_op);
12445
12446                 if (opcode == 0xa || opcode == 0xb) {
12447                     continue;
12448                 }
12449
12450                 /* Accumulating op: handle accumulate step */
12451                 read_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12452
12453                 switch (opcode) {
12454                 case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
12455                     gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
12456                                              tcg_passres);
12457                     break;
12458                 case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */
12459                     gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
12460                                              tcg_passres);
12461                     break;
12462                 case 0x7: /* SQDMLSL, SQDMLSL2 */
12463                     gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
12464                     /* fall through */
12465                 case 0x3: /* SQDMLAL, SQDMLAL2 */
12466                     gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
12467                                                       tcg_res[pass],
12468                                                       tcg_passres);
12469                     break;
12470                 default:
12471                     g_assert_not_reached();
12472                 }
12473                 tcg_temp_free_i64(tcg_passres);
12474             }
12475             tcg_temp_free_i32(tcg_idx);
12476
12477             if (is_scalar) {
12478                 tcg_gen_ext32u_i64(tcg_res[0], tcg_res[0]);
12479             }
12480         }
12481
12482         if (is_scalar) {
12483             tcg_res[1] = tcg_const_i64(0);
12484         }
12485
12486         for (pass = 0; pass < 2; pass++) {
12487             write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
12488             tcg_temp_free_i64(tcg_res[pass]);
12489         }
12490     }
12491
12492     if (fpst) {
12493         tcg_temp_free_ptr(fpst);
12494     }
12495 }
12496
12497 /* Crypto AES
12498  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
12499  * +-----------------+------+-----------+--------+-----+------+------+
12500  * | 0 1 0 0 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12501  * +-----------------+------+-----------+--------+-----+------+------+
12502  */
12503 static void disas_crypto_aes(DisasContext *s, uint32_t insn)
12504 {
12505     int size = extract32(insn, 22, 2);
12506     int opcode = extract32(insn, 12, 5);
12507     int rn = extract32(insn, 5, 5);
12508     int rd = extract32(insn, 0, 5);
12509     int decrypt;
12510     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
12511     TCGv_i32 tcg_decrypt;
12512     CryptoThreeOpIntFn *genfn;
12513
12514     if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
12515         || size != 0) {
12516         unallocated_encoding(s);
12517         return;
12518     }
12519
12520     switch (opcode) {
12521     case 0x4: /* AESE */
12522         decrypt = 0;
12523         genfn = gen_helper_crypto_aese;
12524         break;
12525     case 0x6: /* AESMC */
12526         decrypt = 0;
12527         genfn = gen_helper_crypto_aesmc;
12528         break;
12529     case 0x5: /* AESD */
12530         decrypt = 1;
12531         genfn = gen_helper_crypto_aese;
12532         break;
12533     case 0x7: /* AESIMC */
12534         decrypt = 1;
12535         genfn = gen_helper_crypto_aesmc;
12536         break;
12537     default:
12538         unallocated_encoding(s);
12539         return;
12540     }
12541
12542     if (!fp_access_check(s)) {
12543         return;
12544     }
12545
12546     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
12547     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
12548     tcg_decrypt = tcg_const_i32(decrypt);
12549
12550     genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_decrypt);
12551
12552     tcg_temp_free_ptr(tcg_rd_ptr);
12553     tcg_temp_free_ptr(tcg_rn_ptr);
12554     tcg_temp_free_i32(tcg_decrypt);
12555 }
12556
12557 /* Crypto three-reg SHA
12558  *  31             24 23  22  21 20  16  15 14    12 11 10 9    5 4    0
12559  * +-----------------+------+---+------+---+--------+-----+------+------+
12560  * | 0 1 0 1 1 1 1 0 | size | 0 |  Rm  | 0 | opcode | 0 0 |  Rn  |  Rd  |
12561  * +-----------------+------+---+------+---+--------+-----+------+------+
12562  */
12563 static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
12564 {
12565     int size = extract32(insn, 22, 2);
12566     int opcode = extract32(insn, 12, 3);
12567     int rm = extract32(insn, 16, 5);
12568     int rn = extract32(insn, 5, 5);
12569     int rd = extract32(insn, 0, 5);
12570     CryptoThreeOpFn *genfn;
12571     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
12572     int feature = ARM_FEATURE_V8_SHA256;
12573
12574     if (size != 0) {
12575         unallocated_encoding(s);
12576         return;
12577     }
12578
12579     switch (opcode) {
12580     case 0: /* SHA1C */
12581     case 1: /* SHA1P */
12582     case 2: /* SHA1M */
12583     case 3: /* SHA1SU0 */
12584         genfn = NULL;
12585         feature = ARM_FEATURE_V8_SHA1;
12586         break;
12587     case 4: /* SHA256H */
12588         genfn = gen_helper_crypto_sha256h;
12589         break;
12590     case 5: /* SHA256H2 */
12591         genfn = gen_helper_crypto_sha256h2;
12592         break;
12593     case 6: /* SHA256SU1 */
12594         genfn = gen_helper_crypto_sha256su1;
12595         break;
12596     default:
12597         unallocated_encoding(s);
12598         return;
12599     }
12600
12601     if (!arm_dc_feature(s, feature)) {
12602         unallocated_encoding(s);
12603         return;
12604     }
12605
12606     if (!fp_access_check(s)) {
12607         return;
12608     }
12609
12610     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
12611     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
12612     tcg_rm_ptr = vec_full_reg_ptr(s, rm);
12613
12614     if (genfn) {
12615         genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
12616     } else {
12617         TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
12618
12619         gen_helper_crypto_sha1_3reg(tcg_rd_ptr, tcg_rn_ptr,
12620                                     tcg_rm_ptr, tcg_opcode);
12621         tcg_temp_free_i32(tcg_opcode);
12622     }
12623
12624     tcg_temp_free_ptr(tcg_rd_ptr);
12625     tcg_temp_free_ptr(tcg_rn_ptr);
12626     tcg_temp_free_ptr(tcg_rm_ptr);
12627 }
12628
12629 /* Crypto two-reg SHA
12630  *  31             24 23  22 21       17 16    12 11 10 9    5 4    0
12631  * +-----------------+------+-----------+--------+-----+------+------+
12632  * | 0 1 0 1 1 1 1 0 | size | 1 0 1 0 0 | opcode | 1 0 |  Rn  |  Rd  |
12633  * +-----------------+------+-----------+--------+-----+------+------+
12634  */
12635 static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
12636 {
12637     int size = extract32(insn, 22, 2);
12638     int opcode = extract32(insn, 12, 5);
12639     int rn = extract32(insn, 5, 5);
12640     int rd = extract32(insn, 0, 5);
12641     CryptoTwoOpFn *genfn;
12642     int feature;
12643     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
12644
12645     if (size != 0) {
12646         unallocated_encoding(s);
12647         return;
12648     }
12649
12650     switch (opcode) {
12651     case 0: /* SHA1H */
12652         feature = ARM_FEATURE_V8_SHA1;
12653         genfn = gen_helper_crypto_sha1h;
12654         break;
12655     case 1: /* SHA1SU1 */
12656         feature = ARM_FEATURE_V8_SHA1;
12657         genfn = gen_helper_crypto_sha1su1;
12658         break;
12659     case 2: /* SHA256SU0 */
12660         feature = ARM_FEATURE_V8_SHA256;
12661         genfn = gen_helper_crypto_sha256su0;
12662         break;
12663     default:
12664         unallocated_encoding(s);
12665         return;
12666     }
12667
12668     if (!arm_dc_feature(s, feature)) {
12669         unallocated_encoding(s);
12670         return;
12671     }
12672
12673     if (!fp_access_check(s)) {
12674         return;
12675     }
12676
12677     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
12678     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
12679
12680     genfn(tcg_rd_ptr, tcg_rn_ptr);
12681
12682     tcg_temp_free_ptr(tcg_rd_ptr);
12683     tcg_temp_free_ptr(tcg_rn_ptr);
12684 }
12685
12686 /* Crypto three-reg SHA512
12687  *  31                   21 20  16 15  14  13 12  11  10  9    5 4    0
12688  * +-----------------------+------+---+---+-----+--------+------+------+
12689  * | 1 1 0 0 1 1 1 0 0 1 1 |  Rm  | 1 | O | 0 0 | opcode |  Rn  |  Rd  |
12690  * +-----------------------+------+---+---+-----+--------+------+------+
12691  */
12692 static void disas_crypto_three_reg_sha512(DisasContext *s, uint32_t insn)
12693 {
12694     int opcode = extract32(insn, 10, 2);
12695     int o =  extract32(insn, 14, 1);
12696     int rm = extract32(insn, 16, 5);
12697     int rn = extract32(insn, 5, 5);
12698     int rd = extract32(insn, 0, 5);
12699     int feature;
12700     CryptoThreeOpFn *genfn;
12701
12702     if (o == 0) {
12703         switch (opcode) {
12704         case 0: /* SHA512H */
12705             feature = ARM_FEATURE_V8_SHA512;
12706             genfn = gen_helper_crypto_sha512h;
12707             break;
12708         case 1: /* SHA512H2 */
12709             feature = ARM_FEATURE_V8_SHA512;
12710             genfn = gen_helper_crypto_sha512h2;
12711             break;
12712         case 2: /* SHA512SU1 */
12713             feature = ARM_FEATURE_V8_SHA512;
12714             genfn = gen_helper_crypto_sha512su1;
12715             break;
12716         case 3: /* RAX1 */
12717             feature = ARM_FEATURE_V8_SHA3;
12718             genfn = NULL;
12719             break;
12720         }
12721     } else {
12722         switch (opcode) {
12723         case 0: /* SM3PARTW1 */
12724             feature = ARM_FEATURE_V8_SM3;
12725             genfn = gen_helper_crypto_sm3partw1;
12726             break;
12727         case 1: /* SM3PARTW2 */
12728             feature = ARM_FEATURE_V8_SM3;
12729             genfn = gen_helper_crypto_sm3partw2;
12730             break;
12731         case 2: /* SM4EKEY */
12732             feature = ARM_FEATURE_V8_SM4;
12733             genfn = gen_helper_crypto_sm4ekey;
12734             break;
12735         default:
12736             unallocated_encoding(s);
12737             return;
12738         }
12739     }
12740
12741     if (!arm_dc_feature(s, feature)) {
12742         unallocated_encoding(s);
12743         return;
12744     }
12745
12746     if (!fp_access_check(s)) {
12747         return;
12748     }
12749
12750     if (genfn) {
12751         TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
12752
12753         tcg_rd_ptr = vec_full_reg_ptr(s, rd);
12754         tcg_rn_ptr = vec_full_reg_ptr(s, rn);
12755         tcg_rm_ptr = vec_full_reg_ptr(s, rm);
12756
12757         genfn(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr);
12758
12759         tcg_temp_free_ptr(tcg_rd_ptr);
12760         tcg_temp_free_ptr(tcg_rn_ptr);
12761         tcg_temp_free_ptr(tcg_rm_ptr);
12762     } else {
12763         TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
12764         int pass;
12765
12766         tcg_op1 = tcg_temp_new_i64();
12767         tcg_op2 = tcg_temp_new_i64();
12768         tcg_res[0] = tcg_temp_new_i64();
12769         tcg_res[1] = tcg_temp_new_i64();
12770
12771         for (pass = 0; pass < 2; pass++) {
12772             read_vec_element(s, tcg_op1, rn, pass, MO_64);
12773             read_vec_element(s, tcg_op2, rm, pass, MO_64);
12774
12775             tcg_gen_rotli_i64(tcg_res[pass], tcg_op2, 1);
12776             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
12777         }
12778         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
12779         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
12780
12781         tcg_temp_free_i64(tcg_op1);
12782         tcg_temp_free_i64(tcg_op2);
12783         tcg_temp_free_i64(tcg_res[0]);
12784         tcg_temp_free_i64(tcg_res[1]);
12785     }
12786 }
12787
12788 /* Crypto two-reg SHA512
12789  *  31                                     12  11  10  9    5 4    0
12790  * +-----------------------------------------+--------+------+------+
12791  * | 1 1 0 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 0 0 | opcode |  Rn  |  Rd  |
12792  * +-----------------------------------------+--------+------+------+
12793  */
12794 static void disas_crypto_two_reg_sha512(DisasContext *s, uint32_t insn)
12795 {
12796     int opcode = extract32(insn, 10, 2);
12797     int rn = extract32(insn, 5, 5);
12798     int rd = extract32(insn, 0, 5);
12799     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr;
12800     int feature;
12801     CryptoTwoOpFn *genfn;
12802
12803     switch (opcode) {
12804     case 0: /* SHA512SU0 */
12805         feature = ARM_FEATURE_V8_SHA512;
12806         genfn = gen_helper_crypto_sha512su0;
12807         break;
12808     case 1: /* SM4E */
12809         feature = ARM_FEATURE_V8_SM4;
12810         genfn = gen_helper_crypto_sm4e;
12811         break;
12812     default:
12813         unallocated_encoding(s);
12814         return;
12815     }
12816
12817     if (!arm_dc_feature(s, feature)) {
12818         unallocated_encoding(s);
12819         return;
12820     }
12821
12822     if (!fp_access_check(s)) {
12823         return;
12824     }
12825
12826     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
12827     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
12828
12829     genfn(tcg_rd_ptr, tcg_rn_ptr);
12830
12831     tcg_temp_free_ptr(tcg_rd_ptr);
12832     tcg_temp_free_ptr(tcg_rn_ptr);
12833 }
12834
12835 /* Crypto four-register
12836  *  31               23 22 21 20  16 15  14  10 9    5 4    0
12837  * +-------------------+-----+------+---+------+------+------+
12838  * | 1 1 0 0 1 1 1 0 0 | Op0 |  Rm  | 0 |  Ra  |  Rn  |  Rd  |
12839  * +-------------------+-----+------+---+------+------+------+
12840  */
12841 static void disas_crypto_four_reg(DisasContext *s, uint32_t insn)
12842 {
12843     int op0 = extract32(insn, 21, 2);
12844     int rm = extract32(insn, 16, 5);
12845     int ra = extract32(insn, 10, 5);
12846     int rn = extract32(insn, 5, 5);
12847     int rd = extract32(insn, 0, 5);
12848     int feature;
12849
12850     switch (op0) {
12851     case 0: /* EOR3 */
12852     case 1: /* BCAX */
12853         feature = ARM_FEATURE_V8_SHA3;
12854         break;
12855     case 2: /* SM3SS1 */
12856         feature = ARM_FEATURE_V8_SM3;
12857         break;
12858     default:
12859         unallocated_encoding(s);
12860         return;
12861     }
12862
12863     if (!arm_dc_feature(s, feature)) {
12864         unallocated_encoding(s);
12865         return;
12866     }
12867
12868     if (!fp_access_check(s)) {
12869         return;
12870     }
12871
12872     if (op0 < 2) {
12873         TCGv_i64 tcg_op1, tcg_op2, tcg_op3, tcg_res[2];
12874         int pass;
12875
12876         tcg_op1 = tcg_temp_new_i64();
12877         tcg_op2 = tcg_temp_new_i64();
12878         tcg_op3 = tcg_temp_new_i64();
12879         tcg_res[0] = tcg_temp_new_i64();
12880         tcg_res[1] = tcg_temp_new_i64();
12881
12882         for (pass = 0; pass < 2; pass++) {
12883             read_vec_element(s, tcg_op1, rn, pass, MO_64);
12884             read_vec_element(s, tcg_op2, rm, pass, MO_64);
12885             read_vec_element(s, tcg_op3, ra, pass, MO_64);
12886
12887             if (op0 == 0) {
12888                 /* EOR3 */
12889                 tcg_gen_xor_i64(tcg_res[pass], tcg_op2, tcg_op3);
12890             } else {
12891                 /* BCAX */
12892                 tcg_gen_andc_i64(tcg_res[pass], tcg_op2, tcg_op3);
12893             }
12894             tcg_gen_xor_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
12895         }
12896         write_vec_element(s, tcg_res[0], rd, 0, MO_64);
12897         write_vec_element(s, tcg_res[1], rd, 1, MO_64);
12898
12899         tcg_temp_free_i64(tcg_op1);
12900         tcg_temp_free_i64(tcg_op2);
12901         tcg_temp_free_i64(tcg_op3);
12902         tcg_temp_free_i64(tcg_res[0]);
12903         tcg_temp_free_i64(tcg_res[1]);
12904     } else {
12905         TCGv_i32 tcg_op1, tcg_op2, tcg_op3, tcg_res, tcg_zero;
12906
12907         tcg_op1 = tcg_temp_new_i32();
12908         tcg_op2 = tcg_temp_new_i32();
12909         tcg_op3 = tcg_temp_new_i32();
12910         tcg_res = tcg_temp_new_i32();
12911         tcg_zero = tcg_const_i32(0);
12912
12913         read_vec_element_i32(s, tcg_op1, rn, 3, MO_32);
12914         read_vec_element_i32(s, tcg_op2, rm, 3, MO_32);
12915         read_vec_element_i32(s, tcg_op3, ra, 3, MO_32);
12916
12917         tcg_gen_rotri_i32(tcg_res, tcg_op1, 20);
12918         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op2);
12919         tcg_gen_add_i32(tcg_res, tcg_res, tcg_op3);
12920         tcg_gen_rotri_i32(tcg_res, tcg_res, 25);
12921
12922         write_vec_element_i32(s, tcg_zero, rd, 0, MO_32);
12923         write_vec_element_i32(s, tcg_zero, rd, 1, MO_32);
12924         write_vec_element_i32(s, tcg_zero, rd, 2, MO_32);
12925         write_vec_element_i32(s, tcg_res, rd, 3, MO_32);
12926
12927         tcg_temp_free_i32(tcg_op1);
12928         tcg_temp_free_i32(tcg_op2);
12929         tcg_temp_free_i32(tcg_op3);
12930         tcg_temp_free_i32(tcg_res);
12931         tcg_temp_free_i32(tcg_zero);
12932     }
12933 }
12934
12935 /* Crypto XAR
12936  *  31                   21 20  16 15    10 9    5 4    0
12937  * +-----------------------+------+--------+------+------+
12938  * | 1 1 0 0 1 1 1 0 1 0 0 |  Rm  |  imm6  |  Rn  |  Rd  |
12939  * +-----------------------+------+--------+------+------+
12940  */
12941 static void disas_crypto_xar(DisasContext *s, uint32_t insn)
12942 {
12943     int rm = extract32(insn, 16, 5);
12944     int imm6 = extract32(insn, 10, 6);
12945     int rn = extract32(insn, 5, 5);
12946     int rd = extract32(insn, 0, 5);
12947     TCGv_i64 tcg_op1, tcg_op2, tcg_res[2];
12948     int pass;
12949
12950     if (!arm_dc_feature(s, ARM_FEATURE_V8_SHA3)) {
12951         unallocated_encoding(s);
12952         return;
12953     }
12954
12955     if (!fp_access_check(s)) {
12956         return;
12957     }
12958
12959     tcg_op1 = tcg_temp_new_i64();
12960     tcg_op2 = tcg_temp_new_i64();
12961     tcg_res[0] = tcg_temp_new_i64();
12962     tcg_res[1] = tcg_temp_new_i64();
12963
12964     for (pass = 0; pass < 2; pass++) {
12965         read_vec_element(s, tcg_op1, rn, pass, MO_64);
12966         read_vec_element(s, tcg_op2, rm, pass, MO_64);
12967
12968         tcg_gen_xor_i64(tcg_res[pass], tcg_op1, tcg_op2);
12969         tcg_gen_rotri_i64(tcg_res[pass], tcg_res[pass], imm6);
12970     }
12971     write_vec_element(s, tcg_res[0], rd, 0, MO_64);
12972     write_vec_element(s, tcg_res[1], rd, 1, MO_64);
12973
12974     tcg_temp_free_i64(tcg_op1);
12975     tcg_temp_free_i64(tcg_op2);
12976     tcg_temp_free_i64(tcg_res[0]);
12977     tcg_temp_free_i64(tcg_res[1]);
12978 }
12979
12980 /* Crypto three-reg imm2
12981  *  31                   21 20  16 15  14 13 12  11  10  9    5 4    0
12982  * +-----------------------+------+-----+------+--------+------+------+
12983  * | 1 1 0 0 1 1 1 0 0 1 0 |  Rm  | 1 0 | imm2 | opcode |  Rn  |  Rd  |
12984  * +-----------------------+------+-----+------+--------+------+------+
12985  */
12986 static void disas_crypto_three_reg_imm2(DisasContext *s, uint32_t insn)
12987 {
12988     int opcode = extract32(insn, 10, 2);
12989     int imm2 = extract32(insn, 12, 2);
12990     int rm = extract32(insn, 16, 5);
12991     int rn = extract32(insn, 5, 5);
12992     int rd = extract32(insn, 0, 5);
12993     TCGv_ptr tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr;
12994     TCGv_i32 tcg_imm2, tcg_opcode;
12995
12996     if (!arm_dc_feature(s, ARM_FEATURE_V8_SM3)) {
12997         unallocated_encoding(s);
12998         return;
12999     }
13000
13001     if (!fp_access_check(s)) {
13002         return;
13003     }
13004
13005     tcg_rd_ptr = vec_full_reg_ptr(s, rd);
13006     tcg_rn_ptr = vec_full_reg_ptr(s, rn);
13007     tcg_rm_ptr = vec_full_reg_ptr(s, rm);
13008     tcg_imm2   = tcg_const_i32(imm2);
13009     tcg_opcode = tcg_const_i32(opcode);
13010
13011     gen_helper_crypto_sm3tt(tcg_rd_ptr, tcg_rn_ptr, tcg_rm_ptr, tcg_imm2,
13012                             tcg_opcode);
13013
13014     tcg_temp_free_ptr(tcg_rd_ptr);
13015     tcg_temp_free_ptr(tcg_rn_ptr);
13016     tcg_temp_free_ptr(tcg_rm_ptr);
13017     tcg_temp_free_i32(tcg_imm2);
13018     tcg_temp_free_i32(tcg_opcode);
13019 }
13020
13021 /* C3.6 Data processing - SIMD, inc Crypto
13022  *
13023  * As the decode gets a little complex we are using a table based
13024  * approach for this part of the decode.
13025  */
13026 static const AArch64DecodeTable data_proc_simd[] = {
13027     /* pattern  ,  mask     ,  fn                        */
13028     { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
13029     { 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
13030     { 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
13031     { 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
13032     { 0x0e300800, 0x9f3e0c00, disas_simd_across_lanes },
13033     { 0x0e000400, 0x9fe08400, disas_simd_copy },
13034     { 0x0f000000, 0x9f000400, disas_simd_indexed }, /* vector indexed */
13035     /* simd_mod_imm decode is a subset of simd_shift_imm, so must precede it */
13036     { 0x0f000400, 0x9ff80400, disas_simd_mod_imm },
13037     { 0x0f000400, 0x9f800400, disas_simd_shift_imm },
13038     { 0x0e000000, 0xbf208c00, disas_simd_tb },
13039     { 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
13040     { 0x2e000000, 0xbf208400, disas_simd_ext },
13041     { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
13042     { 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
13043     { 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
13044     { 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
13045     { 0x5e300800, 0xdf3e0c00, disas_simd_scalar_pairwise },
13046     { 0x5e000400, 0xdfe08400, disas_simd_scalar_copy },
13047     { 0x5f000000, 0xdf000400, disas_simd_indexed }, /* scalar indexed */
13048     { 0x5f000400, 0xdf800400, disas_simd_scalar_shift_imm },
13049     { 0x4e280800, 0xff3e0c00, disas_crypto_aes },
13050     { 0x5e000000, 0xff208c00, disas_crypto_three_reg_sha },
13051     { 0x5e280800, 0xff3e0c00, disas_crypto_two_reg_sha },
13052     { 0xce608000, 0xffe0b000, disas_crypto_three_reg_sha512 },
13053     { 0xcec08000, 0xfffff000, disas_crypto_two_reg_sha512 },
13054     { 0xce000000, 0xff808000, disas_crypto_four_reg },
13055     { 0xce800000, 0xffe00000, disas_crypto_xar },
13056     { 0xce408000, 0xffe0c000, disas_crypto_three_reg_imm2 },
13057     { 0x0e400400, 0x9f60c400, disas_simd_three_reg_same_fp16 },
13058     { 0x0e780800, 0x8f7e0c00, disas_simd_two_reg_misc_fp16 },
13059     { 0x5e400400, 0xdf60c400, disas_simd_scalar_three_reg_same_fp16 },
13060     { 0x00000000, 0x00000000, NULL }
13061 };
13062
13063 static void disas_data_proc_simd(DisasContext *s, uint32_t insn)
13064 {
13065     /* Note that this is called with all non-FP cases from
13066      * table C3-6 so it must UNDEF for entries not specifically
13067      * allocated to instructions in that table.
13068      */
13069     AArch64DecodeFn *fn = lookup_disas_fn(&data_proc_simd[0], insn);
13070     if (fn) {
13071         fn(s, insn);
13072     } else {
13073         unallocated_encoding(s);
13074     }
13075 }
13076
13077 /* C3.6 Data processing - SIMD and floating point */
13078 static void disas_data_proc_simd_fp(DisasContext *s, uint32_t insn)
13079 {
13080     if (extract32(insn, 28, 1) == 1 && extract32(insn, 30, 1) == 0) {
13081         disas_data_proc_fp(s, insn);
13082     } else {
13083         /* SIMD, including crypto */
13084         disas_data_proc_simd(s, insn);
13085     }
13086 }
13087
13088 /* C3.1 A64 instruction index by encoding */
13089 static void disas_a64_insn(CPUARMState *env, DisasContext *s)
13090 {
13091     uint32_t insn;
13092
13093     insn = arm_ldl_code(env, s->pc, s->sctlr_b);
13094     s->insn = insn;
13095     s->pc += 4;
13096
13097     s->fp_access_checked = false;
13098
13099     switch (extract32(insn, 25, 4)) {
13100     case 0x0: case 0x1: case 0x2: case 0x3: /* UNALLOCATED */
13101         unallocated_encoding(s);
13102         break;
13103     case 0x8: case 0x9: /* Data processing - immediate */
13104         disas_data_proc_imm(s, insn);
13105         break;
13106     case 0xa: case 0xb: /* Branch, exception generation and system insns */
13107         disas_b_exc_sys(s, insn);
13108         break;
13109     case 0x4:
13110     case 0x6:
13111     case 0xc:
13112     case 0xe:      /* Loads and stores */
13113         disas_ldst(s, insn);
13114         break;
13115     case 0x5:
13116     case 0xd:      /* Data processing - register */
13117         disas_data_proc_reg(s, insn);
13118         break;
13119     case 0x7:
13120     case 0xf:      /* Data processing - SIMD and floating point */
13121         disas_data_proc_simd_fp(s, insn);
13122         break;
13123     default:
13124         assert(FALSE); /* all 15 cases should be handled above */
13125         break;
13126     }
13127
13128     /* if we allocated any temporaries, free them here */
13129     free_tmp_a64(s);
13130 }
13131
13132 static int aarch64_tr_init_disas_context(DisasContextBase *dcbase,
13133                                          CPUState *cpu, int max_insns)
13134 {
13135     DisasContext *dc = container_of(dcbase, DisasContext, base);
13136     CPUARMState *env = cpu->env_ptr;
13137     ARMCPU *arm_cpu = arm_env_get_cpu(env);
13138     int bound;
13139
13140     dc->pc = dc->base.pc_first;
13141     dc->condjmp = 0;
13142
13143     dc->aarch64 = 1;
13144     /* If we are coming from secure EL0 in a system with a 32-bit EL3, then
13145      * there is no secure EL1, so we route exceptions to EL3.
13146      */
13147     dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
13148                                !arm_el_is_aa64(env, 3);
13149     dc->thumb = 0;
13150     dc->sctlr_b = 0;
13151     dc->be_data = ARM_TBFLAG_BE_DATA(dc->base.tb->flags) ? MO_BE : MO_LE;
13152     dc->condexec_mask = 0;
13153     dc->condexec_cond = 0;
13154     dc->mmu_idx = core_to_arm_mmu_idx(env, ARM_TBFLAG_MMUIDX(dc->base.tb->flags));
13155     dc->tbi0 = ARM_TBFLAG_TBI0(dc->base.tb->flags);
13156     dc->tbi1 = ARM_TBFLAG_TBI1(dc->base.tb->flags);
13157     dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
13158 #if !defined(CONFIG_USER_ONLY)
13159     dc->user = (dc->current_el == 0);
13160 #endif
13161     dc->fp_excp_el = ARM_TBFLAG_FPEXC_EL(dc->base.tb->flags);
13162     dc->sve_excp_el = ARM_TBFLAG_SVEEXC_EL(dc->base.tb->flags);
13163     dc->sve_len = (ARM_TBFLAG_ZCR_LEN(dc->base.tb->flags) + 1) * 16;
13164     dc->vec_len = 0;
13165     dc->vec_stride = 0;
13166     dc->cp_regs = arm_cpu->cp_regs;
13167     dc->features = env->features;
13168
13169     /* Single step state. The code-generation logic here is:
13170      *  SS_ACTIVE == 0:
13171      *   generate code with no special handling for single-stepping (except
13172      *   that anything that can make us go to SS_ACTIVE == 1 must end the TB;
13173      *   this happens anyway because those changes are all system register or
13174      *   PSTATE writes).
13175      *  SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
13176      *   emit code for one insn
13177      *   emit code to clear PSTATE.SS
13178      *   emit code to generate software step exception for completed step
13179      *   end TB (as usual for having generated an exception)
13180      *  SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
13181      *   emit code to generate a software step exception
13182      *   end the TB
13183      */
13184     dc->ss_active = ARM_TBFLAG_SS_ACTIVE(dc->base.tb->flags);
13185     dc->pstate_ss = ARM_TBFLAG_PSTATE_SS(dc->base.tb->flags);
13186     dc->is_ldex = false;
13187     dc->ss_same_el = (arm_debug_target_el(env) == dc->current_el);
13188
13189     /* Bound the number of insns to execute to those left on the page.  */
13190     bound = -(dc->base.pc_first | TARGET_PAGE_MASK) / 4;
13191
13192     /* If architectural single step active, limit to 1.  */
13193     if (dc->ss_active) {
13194         bound = 1;
13195     }
13196     max_insns = MIN(max_insns, bound);
13197
13198     init_tmp_a64_array(dc);
13199
13200     return max_insns;
13201 }
13202
13203 static void aarch64_tr_tb_start(DisasContextBase *db, CPUState *cpu)
13204 {
13205     tcg_clear_temp_count();
13206 }
13207
13208 static void aarch64_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu)
13209 {
13210     DisasContext *dc = container_of(dcbase, DisasContext, base);
13211
13212     tcg_gen_insn_start(dc->pc, 0, 0);
13213     dc->insn_start = tcg_last_op();
13214 }
13215
13216 static bool aarch64_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu,
13217                                         const CPUBreakpoint *bp)
13218 {
13219     DisasContext *dc = container_of(dcbase, DisasContext, base);
13220
13221     if (bp->flags & BP_CPU) {
13222         gen_a64_set_pc_im(dc->pc);
13223         gen_helper_check_breakpoints(cpu_env);
13224         /* End the TB early; it likely won't be executed */
13225         dc->base.is_jmp = DISAS_TOO_MANY;
13226     } else {
13227         gen_exception_internal_insn(dc, 0, EXCP_DEBUG);
13228         /* The address covered by the breakpoint must be
13229            included in [tb->pc, tb->pc + tb->size) in order
13230            to for it to be properly cleared -- thus we
13231            increment the PC here so that the logic setting
13232            tb->size below does the right thing.  */
13233         dc->pc += 4;
13234         dc->base.is_jmp = DISAS_NORETURN;
13235     }
13236
13237     return true;
13238 }
13239
13240 static void aarch64_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu)
13241 {
13242     DisasContext *dc = container_of(dcbase, DisasContext, base);
13243     CPUARMState *env = cpu->env_ptr;
13244
13245     if (dc->ss_active && !dc->pstate_ss) {
13246         /* Singlestep state is Active-pending.
13247          * If we're in this state at the start of a TB then either
13248          *  a) we just took an exception to an EL which is being debugged
13249          *     and this is the first insn in the exception handler
13250          *  b) debug exceptions were masked and we just unmasked them
13251          *     without changing EL (eg by clearing PSTATE.D)
13252          * In either case we're going to take a swstep exception in the
13253          * "did not step an insn" case, and so the syndrome ISV and EX
13254          * bits should be zero.
13255          */
13256         assert(dc->base.num_insns == 1);
13257         gen_exception(EXCP_UDEF, syn_swstep(dc->ss_same_el, 0, 0),
13258                       default_exception_el(dc));
13259         dc->base.is_jmp = DISAS_NORETURN;
13260     } else {
13261         disas_a64_insn(env, dc);
13262     }
13263
13264     dc->base.pc_next = dc->pc;
13265     translator_loop_temp_check(&dc->base);
13266 }
13267
13268 static void aarch64_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu)
13269 {
13270     DisasContext *dc = container_of(dcbase, DisasContext, base);
13271
13272     if (unlikely(dc->base.singlestep_enabled || dc->ss_active)) {
13273         /* Note that this means single stepping WFI doesn't halt the CPU.
13274          * For conditional branch insns this is harmless unreachable code as
13275          * gen_goto_tb() has already handled emitting the debug exception
13276          * (and thus a tb-jump is not possible when singlestepping).
13277          */
13278         switch (dc->base.is_jmp) {
13279         default:
13280             gen_a64_set_pc_im(dc->pc);
13281             /* fall through */
13282         case DISAS_EXIT:
13283         case DISAS_JUMP:
13284             if (dc->base.singlestep_enabled) {
13285                 gen_exception_internal(EXCP_DEBUG);
13286             } else {
13287                 gen_step_complete_exception(dc);
13288             }
13289             break;
13290         case DISAS_NORETURN:
13291             break;
13292         }
13293     } else {
13294         switch (dc->base.is_jmp) {
13295         case DISAS_NEXT:
13296         case DISAS_TOO_MANY:
13297             gen_goto_tb(dc, 1, dc->pc);
13298             break;
13299         default:
13300         case DISAS_UPDATE:
13301             gen_a64_set_pc_im(dc->pc);
13302             /* fall through */
13303         case DISAS_JUMP:
13304             tcg_gen_lookup_and_goto_ptr();
13305             break;
13306         case DISAS_EXIT:
13307             tcg_gen_exit_tb(0);
13308             break;
13309         case DISAS_NORETURN:
13310         case DISAS_SWI:
13311             break;
13312         case DISAS_WFE:
13313             gen_a64_set_pc_im(dc->pc);
13314             gen_helper_wfe(cpu_env);
13315             break;
13316         case DISAS_YIELD:
13317             gen_a64_set_pc_im(dc->pc);
13318             gen_helper_yield(cpu_env);
13319             break;
13320         case DISAS_WFI:
13321         {
13322             /* This is a special case because we don't want to just halt the CPU
13323              * if trying to debug across a WFI.
13324              */
13325             TCGv_i32 tmp = tcg_const_i32(4);
13326
13327             gen_a64_set_pc_im(dc->pc);
13328             gen_helper_wfi(cpu_env, tmp);
13329             tcg_temp_free_i32(tmp);
13330             /* The helper doesn't necessarily throw an exception, but we
13331              * must go back to the main loop to check for interrupts anyway.
13332              */
13333             tcg_gen_exit_tb(0);
13334             break;
13335         }
13336         }
13337     }
13338
13339     /* Functions above can change dc->pc, so re-align db->pc_next */
13340     dc->base.pc_next = dc->pc;
13341 }
13342
13343 static void aarch64_tr_disas_log(const DisasContextBase *dcbase,
13344                                       CPUState *cpu)
13345 {
13346     DisasContext *dc = container_of(dcbase, DisasContext, base);
13347
13348     qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
13349     log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
13350 }
13351
13352 const TranslatorOps aarch64_translator_ops = {
13353     .init_disas_context = aarch64_tr_init_disas_context,
13354     .tb_start           = aarch64_tr_tb_start,
13355     .insn_start         = aarch64_tr_insn_start,
13356     .breakpoint_check   = aarch64_tr_breakpoint_check,
13357     .translate_insn     = aarch64_tr_translate_insn,
13358     .tb_stop            = aarch64_tr_tb_stop,
13359     .disas_log          = aarch64_tr_disas_log,
13360 };